aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig10
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipset/Kconfig9
-rw-r--r--net/netfilter/ipset/Makefile1
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c15
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c15
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c15
-rw-r--r--net/netfilter/ipset/ip_set_core.c55
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h30
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c173
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c20
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c29
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c16
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c22
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c23
-rw-r--r--net/netfilter/ipvs/Kconfig10
-rw-r--r--net/netfilter/ipvs/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c74
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c223
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_fo.c79
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c388
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_expect.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c26
-rw-r--r--net/netfilter/nf_conntrack_standalone.c2
-rw-r--r--net/netfilter/nf_log_common.c2
-rw-r--r--net/netfilter/nf_nat_core.c5
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nf_tables_api.c601
-rw-r--r--net/netfilter/nfnetlink.c70
-rw-r--r--net/netfilter/nfnetlink_acct.c54
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nfnetlink_queue_core.c12
-rw-r--r--net/netfilter/nft_compat.c116
-rw-r--r--net/netfilter/nft_hash.c12
-rw-r--r--net/netfilter/nft_masq.c59
-rw-r--r--net/netfilter/nft_meta.c45
-rw-r--r--net/netfilter/nft_nat.c16
-rw-r--r--net/netfilter/nft_rbtree.c2
-rw-r--r--net/netfilter/nft_reject.c37
-rw-r--r--net/netfilter/nft_reject_inet.c94
-rw-r--r--net/netfilter/x_tables.c30
-rw-r--r--net/netfilter/xt_HMARK.c2
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netfilter/xt_cluster.c3
-rw-r--r--net/netfilter/xt_connbytes.c2
-rw-r--r--net/netfilter/xt_hashlimit.c4
-rw-r--r--net/netfilter/xt_physdev.c3
-rw-r--r--net/netfilter/xt_set.c191
-rw-r--r--net/netfilter/xt_string.c1
73 files changed, 2075 insertions, 747 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index b5c1d3aadb41..ae5096ab65eb 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -496,6 +496,15 @@ config NFT_LIMIT
This option adds the "limit" expression that you can use to
ratelimit rule matchings.
+config NFT_MASQ
+ depends on NF_TABLES
+ depends on NF_CONNTRACK
+ depends on NF_NAT
+ tristate "Netfilter nf_tables masquerade support"
+ help
+ This option adds the "masquerade" expression that you can use
+ to perform NAT in the masquerade flavour.
+
config NFT_NAT
depends on NF_TABLES
depends on NF_CONNTRACK
@@ -847,6 +856,7 @@ config NETFILTER_XT_TARGET_TPROXY
tristate '"TPROXY" target transparent proxying support'
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
+ depends on (IPV6 || IPV6=n)
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index fad5fdba34e5..a9571be3f791 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
+obj-$(CONFIG_NFT_MASQ) += nft_masq.o
# generic X tables
obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 2f7f5c32c6f9..234a8ec82076 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET
To compile it as a module, choose M here. If unsure, say N.
+config IP_SET_HASH_MAC
+ tristate "hash:mac set support"
+ depends on IP_SET
+ help
+ This option adds the hash:mac set type support, by which
+ one can store MAC (ethernet address) elements in a set.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_SET_HASH_NETPORTNET
tristate "hash:net,port,net set support"
depends on IP_SET
diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile
index 231f10196cb9..3dbd5e958489 100644
--- a/net/netfilter/ipset/Makefile
+++ b/net/netfilter/ipset/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
obj-$(CONFIG_IP_SET_HASH_IPPORTNET) += ip_set_hash_ipportnet.o
+obj-$(CONFIG_IP_SET_HASH_MAC) += ip_set_hash_mac.o
obj-$(CONFIG_IP_SET_HASH_NET) += ip_set_hash_net.o
obj-$(CONFIG_IP_SET_HASH_NETPORT) += ip_set_hash_netport.o
obj-$(CONFIG_IP_SET_HASH_NETIFACE) += ip_set_hash_netiface.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index f2c7d83dc23f..6f024a8a1534 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -128,6 +128,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
return 0;
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(x, set), ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(x, set), ext, mext, flags);
return 1;
}
@@ -161,6 +163,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_counter(ext_counter(x, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(x, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 6f1f9f494808..55b083ec587a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -112,7 +113,7 @@ bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ip_adt_elem e = { };
+ struct bitmap_ip_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -132,14 +133,17 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
struct bitmap_ip *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
u32 ip = 0, ip_to = 0;
- struct bitmap_ip_adt_elem e = { };
+ struct bitmap_ip_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret = 0;
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -357,6 +361,9 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 740eabededd9..86104744b00f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -27,7 +27,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -203,7 +204,7 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = {};
+ struct bitmap_ipmac_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
@@ -232,7 +233,7 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct bitmap_ipmac *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_ipmac_adt_elem e = {};
+ struct bitmap_ipmac_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0;
int ret = 0;
@@ -240,7 +241,10 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -394,6 +398,9 @@ static struct ip_set_type bitmap_ipmac_type = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index cf99676e69f8..005dd36444c3 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -22,7 +22,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counter support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comment support added */
+/* 2 Comment support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -104,7 +105,7 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
{
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_port_adt_elem e = {};
+ struct bitmap_port_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
__be16 __port;
u16 port = 0;
@@ -129,7 +130,7 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
{
struct bitmap_port *map = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct bitmap_port_adt_elem e = {};
+ struct bitmap_port_adt_elem e = { .id = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port; /* wraparound */
u16 port_to;
@@ -139,7 +140,10 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -291,6 +295,9 @@ static struct ip_set_type bitmap_port_type = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index ec8114fae50b..912e5a05b79d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -101,7 +101,7 @@ load_settype(const char *name)
nfnl_unlock(NFNL_SUBSYS_IPSET);
pr_debug("try to load ip_set_%s\n", name);
if (request_module("ip_set_%s", name) < 0) {
- pr_warning("Can't find ip_set type %s\n", name);
+ pr_warn("Can't find ip_set type %s\n", name);
nfnl_lock(NFNL_SUBSYS_IPSET);
return false;
}
@@ -195,20 +195,19 @@ ip_set_type_register(struct ip_set_type *type)
int ret = 0;
if (type->protocol != IPSET_PROTOCOL) {
- pr_warning("ip_set type %s, family %s, revision %u:%u uses "
- "wrong protocol version %u (want %u)\n",
- type->name, family_name(type->family),
- type->revision_min, type->revision_max,
- type->protocol, IPSET_PROTOCOL);
+ pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n",
+ type->name, family_name(type->family),
+ type->revision_min, type->revision_max,
+ type->protocol, IPSET_PROTOCOL);
return -EINVAL;
}
ip_set_type_lock();
if (find_set_type(type->name, type->family, type->revision_min)) {
/* Duplicate! */
- pr_warning("ip_set type %s, family %s with revision min %u "
- "already registered!\n", type->name,
- family_name(type->family), type->revision_min);
+ pr_warn("ip_set type %s, family %s with revision min %u already registered!\n",
+ type->name, family_name(type->family),
+ type->revision_min);
ret = -EINVAL;
goto unlock;
}
@@ -228,9 +227,9 @@ ip_set_type_unregister(struct ip_set_type *type)
{
ip_set_type_lock();
if (!find_set_type(type->name, type->family, type->revision_min)) {
- pr_warning("ip_set type %s, family %s with revision min %u "
- "not registered\n", type->name,
- family_name(type->family), type->revision_min);
+ pr_warn("ip_set type %s, family %s with revision min %u not registered\n",
+ type->name, family_name(type->family),
+ type->revision_min);
goto unlock;
}
list_del_rcu(&type->list);
@@ -338,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
.len = sizeof(unsigned long),
.align = __alignof__(unsigned long),
},
+ [IPSET_EXT_ID_SKBINFO] = {
+ .type = IPSET_EXT_SKBINFO,
+ .flag = IPSET_FLAG_WITH_SKBINFO,
+ .len = sizeof(struct ip_set_skbinfo),
+ .align = __alignof__(struct ip_set_skbinfo),
+ },
[IPSET_EXT_ID_COMMENT] = {
.type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
.flag = IPSET_FLAG_WITH_COMMENT,
@@ -383,6 +388,7 @@ int
ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
struct ip_set_ext *ext)
{
+ u64 fullmark;
if (tb[IPSET_ATTR_TIMEOUT]) {
if (!(set->extensions & IPSET_EXT_TIMEOUT))
return -IPSET_ERR_TIMEOUT;
@@ -403,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_COMMENT;
ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
}
-
+ if (tb[IPSET_ATTR_SKBMARK]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+ ext->skbmark = fullmark >> 32;
+ ext->skbmarkmask = fullmark & 0xffffffff;
+ }
+ if (tb[IPSET_ATTR_SKBPRIO]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbprio = be32_to_cpu(nla_get_be32(
+ tb[IPSET_ATTR_SKBPRIO]));
+ }
+ if (tb[IPSET_ATTR_SKBQUEUE]) {
+ if (!(set->extensions & IPSET_EXT_SKBINFO))
+ return -IPSET_ERR_SKBINFO;
+ ext->skbqueue = be16_to_cpu(nla_get_be16(
+ tb[IPSET_ATTR_SKBQUEUE]));
+ }
return 0;
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
@@ -478,7 +502,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
if (ret == -EAGAIN) {
/* Type requests element to be completed */
- pr_debug("element must be competed, ADD is triggered\n");
+ pr_debug("element must be completed, ADD is triggered\n");
write_lock_bh(&set->lock);
set->variant->kadt(set, skb, par, IPSET_ADD, opt);
write_unlock_bh(&set->lock);
@@ -1398,7 +1422,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
struct sk_buff *skb2;
struct nlmsgerr *errmsg;
- size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
+ size_t payload = min(SIZE_MAX,
+ sizeof(*errmsg) + nlmsg_len(nlh));
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
struct nlattr *cmdattr;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 61c7fb052802..fee7c64e4dd1 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -565,8 +565,8 @@ retry:
set->name, orig->htable_bits, htable_bits, orig);
if (!htable_bits) {
/* In case we have plenty of memory :-) */
- pr_warning("Cannot increase the hashsize of set %s further\n",
- set->name);
+ pr_warn("Cannot increase the hashsize of set %s further\n",
+ set->name);
return -IPSET_ERR_HASH_FULL;
}
t = ip_set_alloc(sizeof(*t)
@@ -651,8 +651,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
if (h->elements >= h->maxelem) {
if (net_ratelimit())
- pr_warning("Set %s is full, maxelem %u reached\n",
- set->name, h->maxelem);
+ pr_warn("Set %s is full, maxelem %u reached\n",
+ set->name, h->maxelem);
return -IPSET_ERR_HASH_FULL;
}
@@ -720,6 +720,8 @@ reuse_slot:
ip_set_init_counter(ext_counter(data, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(data, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
out:
rcu_read_unlock_bh();
@@ -797,6 +799,9 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set))
ip_set_update_counter(ext_counter(data, set),
ext, mext, flags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(data, set),
+ ext, mext, flags);
return mtype_do_data_match(data);
}
@@ -998,8 +1003,8 @@ mtype_list(const struct ip_set *set,
nla_put_failure:
nlmsg_trim(skb, incomplete);
if (unlikely(first == cb->args[IPSET_CB_ARG0])) {
- pr_warning("Can't list set %s: one bucket does not fit into "
- "a message. Please report it!\n", set->name);
+ pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n",
+ set->name);
cb->args[IPSET_CB_ARG0] = 0;
return -EMSGSIZE;
}
@@ -1049,8 +1054,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
struct HTYPE *h;
struct htable *t;
+#ifndef IP_SET_PROTO_UNDEF
if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
return -IPSET_ERR_INVALID_FAMILY;
+#endif
#ifdef IP_SET_HASH_WITH_MARKMASK
markmask = 0xffffffff;
@@ -1093,7 +1100,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (tb[IPSET_ATTR_MARKMASK]) {
markmask = ntohl(nla_get_u32(tb[IPSET_ATTR_MARKMASK]));
- if ((markmask > 4294967295u) || markmask == 0)
+ if (markmask == 0)
return -IPSET_ERR_INVALID_MARKMASK;
}
#endif
@@ -1132,25 +1139,32 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
rcu_assign_pointer(h->table, t);
set->data = h;
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
+#endif
set->variant = &IPSET_TOKEN(HTYPE, 4_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)));
+#ifndef IP_SET_PROTO_UNDEF
} else {
set->variant = &IPSET_TOKEN(HTYPE, 6_variant);
set->dsize = ip_set_elem_len(set, tb,
sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)));
}
+#endif
if (tb[IPSET_ATTR_TIMEOUT]) {
set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
+#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4)
+#endif
IPSET_TOKEN(HTYPE, 4_gc_init)(set,
IPSET_TOKEN(HTYPE, 4_gc));
+#ifndef IP_SET_PROTO_UNDEF
else
IPSET_TOKEN(HTYPE, 6_gc_init)(set,
IPSET_TOKEN(HTYPE, 6_gc));
+#endif
}
-
pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
set->name, jhash_size(t->htable_bits),
t->htable_bits, h->maxelem, set->data, t);
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index dd40607f878e..76959d79e9d1 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -26,7 +26,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support */
/* 2 Comments support */
-#define IPSET_TYPE_REV_MAX 3 /* Forceadd support */
+/* 3 Forceadd support */
+#define IPSET_TYPE_REV_MAX 4 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -84,7 +85,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip4_elem e = {};
+ struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
__be32 ip;
@@ -103,7 +104,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip4_elem e = {};
+ struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, hosts;
int ret = 0;
@@ -111,7 +112,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (unlikely(!tb[IPSET_ATTR_IP] ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -222,7 +226,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip6_elem e = {};
+ struct hash_ip6_elem e = { { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
@@ -239,7 +243,7 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ip6_elem e = {};
+ struct hash_ip6_elem e = { { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
int ret;
@@ -247,6 +251,9 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -295,6 +302,9 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 4eff0a297254..7abf9788cfa8 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -25,7 +25,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support */
+/* 1 Forceadd support */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Vytas Dauksa <vytas.dauksa@smoothwall.net>");
@@ -113,7 +114,10 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_attr_netorder(tb, IPSET_ATTR_MARK) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -244,6 +248,9 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -301,6 +308,9 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 7597b82a8b03..dcbcceb9a52f 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -28,7 +28,8 @@
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -94,7 +95,7 @@ hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport4_elem e = { };
+ struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -111,7 +112,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport4_elem e = { };
+ struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0, p = 0, port, port_to;
bool with_ports = false;
@@ -122,7 +123,10 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -258,7 +262,7 @@ hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport6_elem e = { };
+ struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -275,7 +279,7 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipport *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipport6_elem e = { };
+ struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
@@ -287,6 +291,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -370,6 +377,9 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 672655ffd573..7ef93fc887a1 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -28,7 +28,8 @@
/* 1 SCTP and UDPLITE support added */
/* 2 Counters support added */
/* 3 Comments support added */
-#define IPSET_TYPE_REV_MAX 4 /* Forceadd support added */
+/* 4 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 5 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -95,7 +96,7 @@ hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip4_elem e = { };
+ struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -113,7 +114,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip4_elem e = { };
+ struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip, ip_to = 0, p = 0, port, port_to;
bool with_ports = false;
@@ -124,7 +125,10 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -265,7 +269,7 @@ hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
enum ipset_adt adt, struct ip_set_adt_opt *opt)
{
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip6_elem e = { };
+ struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
@@ -283,7 +287,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
{
const struct hash_ipportip *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
- struct hash_ipportip6_elem e = { };
+ struct hash_ipportip6_elem e = { .ip = { .all = { 0 } } };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 port, port_to;
bool with_ports = false;
@@ -295,6 +299,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -382,6 +389,9 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7308d84f9277..b6012ad92781 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -30,7 +30,8 @@
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -179,7 +180,10 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -432,6 +436,9 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE) ||
tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_CIDR]))
return -IPSET_ERR_PROTOCOL;
@@ -541,6 +548,9 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
new file mode 100644
index 000000000000..65690b52a4d5
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -0,0 +1,173 @@
+/* Copyright (C) 2014 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
+#include <net/netlink.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN 0
+#define IPSET_TYPE_REV_MAX 0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+IP_SET_MODULE_DESC("hash:mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:mac");
+
+/* Type specific function prefix */
+#define HTYPE hash_mac
+
+/* Member elements */
+struct hash_mac4_elem {
+ /* Zero valued IP addresses cannot be stored */
+ union {
+ unsigned char ether[ETH_ALEN];
+ __be32 foo[2];
+ };
+};
+
+/* Common functions */
+
+static inline bool
+hash_mac4_data_equal(const struct hash_mac4_elem *e1,
+ const struct hash_mac4_elem *e2,
+ u32 *multi)
+{
+ return ether_addr_equal(e1->ether, e2->ether);
+}
+
+static inline bool
+hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
+{
+ return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether);
+}
+
+static inline void
+hash_mac4_data_next(struct hash_mac4_elem *next,
+ const struct hash_mac4_elem *e)
+{
+}
+
+#define MTYPE hash_mac4
+#define PF 4
+#define HOST_MASK 32
+#define IP_SET_EMIT_CREATE
+#define IP_SET_PROTO_UNDEF
+#include "ip_set_hash_gen.h"
+
+/* Zero valued element is not supported */
+static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
+
+static int
+hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+ const struct xt_action_param *par,
+ enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+ /* MAC can be src only */
+ if (!(opt->flags & IPSET_DIM_ONE_SRC))
+ return 0;
+
+ if (skb_mac_header(skb) < skb->head ||
+ (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+ return -EINVAL;
+
+ memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -EINVAL;
+ return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
+ enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+ ipset_adtfn adtfn = set->variant->adt[adt];
+ struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
+ struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+ int ret;
+
+ if (unlikely(!tb[IPSET_ATTR_ETHER] ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+ return -IPSET_ERR_PROTOCOL;
+
+ if (tb[IPSET_ATTR_LINENO])
+ *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+ memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+ if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
+ return -IPSET_ERR_HASH_ELEM;
+
+ return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_mac_type __read_mostly = {
+ .name = "hash:mac",
+ .protocol = IPSET_PROTOCOL,
+ .features = IPSET_TYPE_MAC,
+ .dimension = IPSET_DIM_ONE,
+ .family = NFPROTO_UNSPEC,
+ .revision_min = IPSET_TYPE_REV_MIN,
+ .revision_max = IPSET_TYPE_REV_MAX,
+ .create = hash_mac_create,
+ .create_policy = {
+ [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 },
+ [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 },
+ [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
+ [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+ },
+ .adt_policy = {
+ [IPSET_ATTR_ETHER] = { .type = NLA_BINARY,
+ .len = ETH_ALEN },
+ [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
+ [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
+ [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
+ },
+ .me = THIS_MODULE,
+};
+
+static int __init
+hash_mac_init(void)
+{
+ return ip_set_type_register(&hash_mac_type);
+}
+
+static void __exit
+hash_mac_fini(void)
+{
+ ip_set_type_unregister(&hash_mac_type);
+}
+
+module_init(hash_mac_init);
+module_exit(hash_mac_fini);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 4c7d495783a3..6b3ac10ac2f1 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -27,7 +27,8 @@
/* 2 nomatch flag support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo mapping support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -150,7 +151,10 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -318,7 +322,10 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -377,6 +384,9 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index db2606805b35..35dd35873442 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -28,7 +28,8 @@
/* 2 /0 support added */
/* 3 Counters support added */
/* 4 Comments support added */
-#define IPSET_TYPE_REV_MAX 5 /* Forceadd support added */
+/* 5 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -236,7 +237,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
if (!nf_bridge)
@@ -281,7 +282,10 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -470,7 +474,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
ip6_netmask(&e.ip, e.cidr);
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
if (!nf_bridge)
@@ -514,7 +518,10 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -590,6 +597,9 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index 3e99987e4bf2..da00284b3571 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -24,7 +24,8 @@
#include <linux/netfilter/ipset/ip_set_hash.h>
#define IPSET_TYPE_REV_MIN 0
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -171,7 +172,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -203,7 +207,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
flags |= (IPSET_FLAG_NOMATCH << 16);
}
- if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] &&
+ if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] ||
tb[IPSET_ATTR_IP2_TO])) {
e.ip[0] = htonl(ip & ip_set_hostmask(e.cidr[0]));
e.ip[1] = htonl(ip2_from & ip_set_hostmask(e.cidr[1]));
@@ -219,9 +223,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
if (ip_to < ip)
swap(ip, ip_to);
- if (ip + UINT_MAX == ip_to)
+ if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- }
+ } else
+ ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
@@ -230,10 +235,10 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
if (ip2_to < ip2_from)
swap(ip2_from, ip2_to);
- if (ip2_from + UINT_MAX == ip2_to)
+ if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
-
- }
+ } else
+ ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
if (retried)
ip = ntohl(h->next.ip[0]);
@@ -393,7 +398,10 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -461,6 +469,9 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 1c645fbd09c7..c0ddb58d19dc 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -29,7 +29,8 @@
/* 3 nomatch flag support added */
/* 4 Counters support added */
/* 5 Comments support added */
-#define IPSET_TYPE_REV_MAX 6 /* Forceadd support added */
+/* 6 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 7 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -172,7 +173,10 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -389,7 +393,10 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -489,6 +496,9 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index c0d2ba73f8b2..b8053d675fc3 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -26,7 +26,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 0 Comments support added */
-#define IPSET_TYPE_REV_MAX 1 /* Forceadd support added */
+/* 1 Forceadd support added */
+#define IPSET_TYPE_REV_MAX 2 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>");
@@ -189,7 +190,10 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -257,7 +261,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip, ip_to);
if (unlikely(ip + UINT_MAX == ip_to))
return -IPSET_ERR_HASH_RANGE;
- }
+ } else
+ ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
port_to = port = ntohs(e.port);
if (tb[IPSET_ATTR_PORT_TO]) {
@@ -275,7 +280,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(ip2_from, ip2_to);
if (unlikely(ip2_from + UINT_MAX == ip2_to))
return -IPSET_ERR_HASH_RANGE;
- }
+ } else
+ ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
if (retried)
ip = ntohl(h->next.ip[0]);
@@ -458,7 +464,10 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (unlikely(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_IP2_TO]))
return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -567,6 +576,9 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 3e2317f3cf68..f8f682806e36 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -17,7 +17,8 @@
#define IPSET_TYPE_REV_MIN 0
/* 1 Counters support added */
-#define IPSET_TYPE_REV_MAX 2 /* Comments support added */
+/* 2 Comments support added */
+#define IPSET_TYPE_REV_MAX 3 /* skbinfo support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -73,6 +74,10 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
ip_set_update_counter(ext_counter(e, set),
ext, &opt->ext,
cmdflags);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_get_skbinfo(ext_skbinfo(e, set),
+ ext, &opt->ext,
+ cmdflags);
return ret;
}
}
@@ -197,6 +202,8 @@ list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
return 0;
}
@@ -307,6 +314,8 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_counter(ext_counter(e, set), ext);
if (SET_WITH_COMMENT(set))
ip_set_init_comment(ext_comment(e, set), ext);
+ if (SET_WITH_SKBINFO(set))
+ ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
/* Set is already added to the list */
ip_set_put_byindex(map->net, d->id);
return 0;
@@ -378,7 +387,10 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
!ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
- !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+ !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
return -IPSET_ERR_PROTOCOL;
if (tb[IPSET_ATTR_LINENO])
@@ -597,7 +609,9 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
struct set_elem *e;
u32 i;
- map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
+ map = kzalloc(sizeof(*map) +
+ min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize,
+ GFP_KERNEL);
if (!map)
return false;
@@ -665,6 +679,9 @@ static struct ip_set_type list_set_type __read_mostly = {
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
[IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
+ [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
+ [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
},
.me = THIS_MODULE,
};
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 0c3b1670b0d1..3b6929dec748 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -152,6 +152,16 @@ config IP_VS_WLC
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.
+config IP_VS_FO
+ tristate "weighted failover scheduling"
+ ---help---
+ The weighted failover scheduling algorithm directs network
+ connections to the server with the highest weight that is
+ currently available.
+
+ If you want to compile it in kernel, say Y. To compile it as a
+ module, choose M here. If unsure, say N.
+
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
---help---
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index 34ee602ddb66..38b2723b2e3d 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
+obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 610e19c0e13f..b0f7b626b56d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -27,6 +27,7 @@
#include <linux/interrupt.h>
#include <linux/in.h>
+#include <linux/inet.h>
#include <linux/net.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -77,6 +78,13 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
+/* We need an addrstrlen that works with or without v6 */
+#ifdef CONFIG_IP_VS_IPV6
+#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
+#else
+#define IP_VS_ADDRSTRLEN (8+1)
+#endif
+
struct ip_vs_aligned_lock
{
spinlock_t l;
@@ -488,7 +496,12 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit;
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+#endif
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
@@ -514,7 +527,10 @@ static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
break;
case IP_VS_CONN_F_TUNNEL:
- cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ if (cp->daf == AF_INET6)
+ cp->packet_xmit = ip_vs_tunnel_xmit_v6;
+ else
+ cp->packet_xmit = ip_vs_tunnel_xmit;
break;
case IP_VS_CONN_F_DROUTE:
@@ -580,7 +596,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -616,7 +632,13 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
rcu_read_lock();
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+
+ /* This function is only invoked by the synchronization code. We do
+ * not currently support heterogeneous pools with synchronization,
+ * so we can make the assumption that the svc_af is the same as the
+ * dest_af
+ */
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
@@ -671,7 +693,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
@@ -740,7 +762,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
ntohs(ct->cport),
IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
ntohs(ct->vport),
- IP_VS_DBG_ADDR(ct->af, &ct->daddr),
+ IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
ntohs(ct->dport));
/*
@@ -848,7 +870,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
-ip_vs_conn_new(const struct ip_vs_conn_param *p,
+ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark)
{
@@ -867,6 +889,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
+ cp->daf = dest_af;
cp->protocol = p->protocol;
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
@@ -874,7 +897,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
&cp->vaddr, p->vaddr);
cp->vport = p->vport;
- ip_vs_addr_set(p->af, &cp->daddr, daddr);
+ ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
@@ -1036,6 +1059,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
+ char dbuf[IP_VS_ADDRSTRLEN];
if (!ip_vs_conn_net_eq(cp, net))
return 0;
@@ -1050,24 +1074,32 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
pe_data[len] = '\0';
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
- "%pI6 %04X %-11s %7lu%s\n",
+ "%s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
- " %08X %04X %-11s %7lu%s\n",
+ " %s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
@@ -1105,6 +1137,7 @@ static const char *ip_vs_origin_name(unsigned int flags)
static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
{
+ char dbuf[IP_VS_ADDRSTRLEN];
if (v == SEQ_START_TOKEN)
seq_puts(seq,
@@ -1117,12 +1150,21 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
return 0;
#ifdef CONFIG_IP_VS_IPV6
+ if (cp->daf == AF_INET6)
+ snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
+ else
+#endif
+ snprintf(dbuf, sizeof(dbuf), "%08X",
+ ntohl(cp->daddr.ip));
+
+#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
- seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
+ seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
- &cp->daddr.in6, ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
@@ -1130,11 +1172,11 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
- "%08X %04X %-11s %-6s %7lu\n",
+ "%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
- ntohl(cp->daddr.ip), ntohs(cp->dport),
+ dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 5c34e8d42e01..990decba1fe4 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -328,7 +328,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* This adds param.pe_data to the template,
* and thus param.pe_data will be destroyed
* when the template expires */
- ct = ip_vs_conn_new(&param, &dest->addr, dport,
+ ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
@@ -357,7 +357,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
src_port, &iph->daddr, dst_port, &param);
- cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
+ cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
+ skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
*ignored = -1;
@@ -479,7 +480,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0], &iph->daddr,
pptr[1], &p);
- cp = ip_vs_conn_new(&p, &dest->addr,
+ cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
if (!cp) {
@@ -491,9 +492,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
- IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
- IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
- IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));
ip_vs_conn_stats(cp, svc);
@@ -550,7 +551,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
- cp = ip_vs_conn_new(&p, &daddr, 0,
+ cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
NULL, skb->mark);
if (!cp)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fd3f444a4f96..ac7ba689efe7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -574,8 +574,8 @@ bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
* Called under RCU lock.
*/
static struct ip_vs_dest *
-ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
@@ -583,9 +583,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* Find the destination for the given service
*/
list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
- if ((dest->af == svc->af)
- && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
- && (dest->port == dport)) {
+ if ((dest->af == dest_af) &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
+ (dest->port == dport)) {
/* HIT */
return dest;
}
@@ -602,7 +602,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* on the backup.
* Called under RCU lock, no refcnt is returned.
*/
-struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
+struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
@@ -613,14 +613,14 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
port = 0;
- dest = ip_vs_lookup_dest(svc, daddr, port);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port);
if (!dest)
- dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
+ dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport);
return dest;
}
@@ -657,8 +657,8 @@ static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
* scheduling.
*/
static struct ip_vs_dest *
-ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
- __be16 dport)
+ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
+ const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
@@ -671,11 +671,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
"dest->refcnt=%d\n",
dest->vfwmark,
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port),
atomic_read(&dest->refcnt));
- if (dest->af == svc->af &&
- ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
+ if (dest->af == dest_af &&
+ ip_vs_addr_equal(dest_af, &dest->addr, daddr) &&
dest->port == dport &&
dest->vfwmark == svc->fwmark &&
dest->protocol == svc->protocol &&
@@ -779,6 +779,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_scheduler *sched;
int conn_flags;
+ /* We cannot modify an address and change the address family */
+ BUG_ON(!add && udest->af != dest->af);
+
+ if (add && udest->af != svc->af)
+ ipvs->mixed_address_family_dests++;
+
/* set the weight and the flags */
atomic_set(&dest->weight, udest->weight);
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
@@ -816,6 +822,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
dest->u_threshold = udest->u_threshold;
dest->l_threshold = udest->l_threshold;
+ dest->af = udest->af;
+
spin_lock_bh(&dest->dst_lock);
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
@@ -847,7 +855,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
EnterFunction(2);
#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6) {
+ if (udest->af == AF_INET6) {
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
@@ -875,12 +883,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
u64_stats_init(&ip_vs_dest_stats->syncp);
}
- dest->af = svc->af;
+ dest->af = udest->af;
dest->protocol = svc->protocol;
dest->vaddr = svc->addr;
dest->vport = svc->port;
dest->vfwmark = svc->fwmark;
- ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr);
dest->port = udest->port;
atomic_set(&dest->activeconns, 0);
@@ -928,11 +936,11 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest != NULL) {
@@ -944,12 +952,12 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
* Check if the dest already exists in the trash and
* is from the same service
*/
- dest = ip_vs_trash_get_dest(svc, &daddr, dport);
+ dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport);
if (dest != NULL) {
IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
"dest->refcnt=%d, service %u/%s:%u\n",
- IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
+ IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport),
atomic_read(&dest->refcnt),
dest->vfwmark,
IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
@@ -992,11 +1000,11 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}
- ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
+ ip_vs_addr_copy(udest->af, &daddr, &udest->addr);
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &daddr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport);
rcu_read_unlock();
if (dest == NULL) {
@@ -1055,6 +1063,9 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
list_del_rcu(&dest->n_list);
svc->num_dests--;
+ if (dest->af != svc->af)
+ net_ipvs(svc->net)->mixed_address_family_dests--;
+
if (svcupd) {
struct ip_vs_scheduler *sched;
@@ -1078,7 +1089,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/* We use function that requires RCU lock */
rcu_read_lock();
- dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
+ dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport);
rcu_read_unlock();
if (dest == NULL) {
@@ -2179,29 +2190,41 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
return 0;
}
+#define CMDID(cmd) (cmd - IP_VS_BASE_CTL)
+
+struct ip_vs_svcdest_user {
+ struct ip_vs_service_user s;
+ struct ip_vs_dest_user d;
+};
+
+static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = {
+ [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user),
+ [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user),
+ [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user),
+ [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user),
+ [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user),
+ [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user),
+ [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
+ [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user),
+ [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user),
+ [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user),
+};
-#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
-#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
-#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
- sizeof(struct ip_vs_dest_user))
-#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
-#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
-#define MAX_ARG_LEN SVCDEST_ARG_LEN
-
-static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
- [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
- [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
- [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
+union ip_vs_set_arglen {
+ struct ip_vs_service_user field_IP_VS_SO_SET_ADD;
+ struct ip_vs_service_user field_IP_VS_SO_SET_EDIT;
+ struct ip_vs_service_user field_IP_VS_SO_SET_DEL;
+ struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST;
+ struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST;
+ struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST;
+ struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT;
+ struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON;
+ struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON;
+ struct ip_vs_service_user field_IP_VS_SO_SET_ZERO;
};
+#define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen)
+
static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
struct ip_vs_service_user *usvc_compat)
{
@@ -2232,6 +2255,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
udest->weight = udest_compat->weight;
udest->u_threshold = udest_compat->u_threshold;
udest->l_threshold = udest_compat->l_threshold;
+ udest->af = AF_INET;
}
static int
@@ -2239,7 +2263,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
struct net *net = sock_net(sk);
int ret;
- unsigned char arg[MAX_ARG_LEN];
+ unsigned char arg[MAX_SET_ARGLEN];
struct ip_vs_service_user *usvc_compat;
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
@@ -2247,16 +2271,15 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
struct ip_vs_dest_user_kern udest;
struct netns_ipvs *ipvs = net_ipvs(net);
+ BUILD_BUG_ON(sizeof(arg) > 255);
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
return -EINVAL;
- if (len < 0 || len > MAX_ARG_LEN)
- return -EINVAL;
- if (len != set_arglen[SET_CMDID(cmd)]) {
- pr_err("set_ctl: len %u != %u\n",
- len, set_arglen[SET_CMDID(cmd)]);
+ if (len != set_arglen[CMDID(cmd)]) {
+ IP_VS_DBG(1, "set_ctl: len %u != %u\n",
+ len, set_arglen[CMDID(cmd)]);
return -EINVAL;
}
@@ -2469,6 +2492,12 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
if (count >= get->num_dests)
break;
+ /* Cannot expose heterogeneous members via sockopt
+ * interface
+ */
+ if (dest->af != svc->af)
+ continue;
+
entry.addr = dest->addr.ip;
entry.port = dest->port;
entry.conn_flags = atomic_read(&dest->conn_flags);
@@ -2512,51 +2541,51 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
#endif
}
+static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = {
+ [CMDID(IP_VS_SO_GET_VERSION)] = 64,
+ [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo),
+ [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services),
+ [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry),
+ [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests),
+ [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user),
+ [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user),
+};
-#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
-#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
-#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
-#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
-#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
-#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
-#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
-
-static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
- [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
- [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
- [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
+union ip_vs_get_arglen {
+ char field_IP_VS_SO_GET_VERSION[64];
+ struct ip_vs_getinfo field_IP_VS_SO_GET_INFO;
+ struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES;
+ struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE;
+ struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS;
+ struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT;
+ struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2];
};
+#define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen)
+
static int
do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
- unsigned char arg[128];
+ unsigned char arg[MAX_GET_ARGLEN];
int ret = 0;
unsigned int copylen;
struct net *net = sock_net(sk);
struct netns_ipvs *ipvs = net_ipvs(net);
BUG_ON(!net);
+ BUILD_BUG_ON(sizeof(arg) > 255);
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
return -EINVAL;
- if (*len < get_arglen[GET_CMDID(cmd)]) {
- pr_err("get_ctl: len %u < %u\n",
- *len, get_arglen[GET_CMDID(cmd)]);
+ copylen = get_arglen[CMDID(cmd)];
+ if (*len < (int) copylen) {
+ IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen);
return -EINVAL;
}
- copylen = get_arglen[GET_CMDID(cmd)];
- if (copylen > 128)
- return -EINVAL;
-
if (copy_from_user(arg, user, copylen) != 0)
return -EFAULT;
/*
@@ -2766,6 +2795,7 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
+ [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
@@ -3021,7 +3051,8 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
atomic_read(&dest->inactconns)) ||
nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
- atomic_read(&dest->persistconns)))
+ atomic_read(&dest->persistconns)) ||
+ nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
goto nla_put_failure;
@@ -3102,6 +3133,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
struct nlattr *nla_addr, *nla_port;
+ struct nlattr *nla_addr_family;
/* Parse mandatory identifying destination fields first */
if (nla == NULL ||
@@ -3110,6 +3142,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
nla_port = attrs[IPVS_DEST_ATTR_PORT];
+ nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY];
if (!(nla_addr && nla_port))
return -EINVAL;
@@ -3119,6 +3152,11 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_be16(nla_port);
+ if (nla_addr_family)
+ udest->af = nla_get_u16(nla_addr_family);
+ else
+ udest->af = 0;
+
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
@@ -3223,6 +3261,12 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
+ /* The synchronization protocol is incompatible with mixed family
+ * services
+ */
+ if (net_ipvs(net)->mixed_address_family_dests > 0)
+ return -EINVAL;
+
return start_sync_thread(net,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
@@ -3346,6 +3390,35 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
need_full_dest);
if (ret)
goto out;
+
+ /* Old protocols did not allow the user to specify address
+ * family, so we set it to zero instead. We also didn't
+ * allow heterogeneous pools in the old code, so it's safe
+ * to assume that this will have the same address family as
+ * the service.
+ */
+ if (udest.af == 0)
+ udest.af = svc->af;
+
+ if (udest.af != svc->af) {
+ /* The synchronization protocol is incompatible
+ * with mixed family services
+ */
+ if (net_ipvs(net)->sync_state) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Which connection types do we support? */
+ switch (udest.conn_flags) {
+ case IP_VS_CONN_F_TUNNEL:
+ /* We are able to forward this */
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ }
}
switch (cmd) {
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index c3b84546ea9e..6be5c538b71e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -234,7 +234,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
new file mode 100644
index 000000000000..e09874d02938
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -0,0 +1,79 @@
+/*
+ * IPVS: Weighted Fail Over module
+ *
+ * Authors: Kenny Mathis <kmathis@chokepoint.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * Kenny Mathis : added initial functionality based on weight
+ *
+ */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <net/ip_vs.h>
+
+/* Weighted Fail Over Module */
+static struct ip_vs_dest *
+ip_vs_fo_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_dest *dest, *hweight = NULL;
+ int hw = 0; /* Track highest weight */
+
+ IP_VS_DBG(6, "ip_vs_fo_schedule(): Scheduling...\n");
+
+ /* Basic failover functionality
+ * Find virtual server with highest weight and send it traffic
+ */
+ list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
+ if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+ atomic_read(&dest->weight) > hw) {
+ hweight = dest;
+ hw = atomic_read(&dest->weight);
+ }
+ }
+
+ if (hweight) {
+ IP_VS_DBG_BUF(6, "FO: server %s:%u activeconns %d weight %d\n",
+ IP_VS_DBG_ADDR(hweight->af, &hweight->addr),
+ ntohs(hweight->port),
+ atomic_read(&hweight->activeconns),
+ atomic_read(&hweight->weight));
+ return hweight;
+ }
+
+ ip_vs_scheduler_err(svc, "no destination available");
+ return NULL;
+}
+
+static struct ip_vs_scheduler ip_vs_fo_scheduler = {
+ .name = "fo",
+ .refcnt = ATOMIC_INIT(0),
+ .module = THIS_MODULE,
+ .n_list = LIST_HEAD_INIT(ip_vs_fo_scheduler.n_list),
+ .schedule = ip_vs_fo_schedule,
+};
+
+static int __init ip_vs_fo_init(void)
+{
+ return register_ip_vs_scheduler(&ip_vs_fo_scheduler);
+}
+
+static void __exit ip_vs_fo_cleanup(void)
+{
+ unregister_ip_vs_scheduler(&ip_vs_fo_scheduler);
+ synchronize_rcu();
+}
+
+module_init(ip_vs_fo_init);
+module_exit(ip_vs_fo_cleanup);
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 77c173282f38..a64fa15790e5 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -233,7 +233,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
ip_vs_conn_fill_param(ip_vs_conn_net(cp),
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
- n_cp = ip_vs_conn_new(&p, &from, port,
+ /* As above, this is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
cp->dest, skb->mark);
@@ -396,7 +397,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
- n_cp = ip_vs_conn_new(&p, &cp->daddr,
+ /* This is ipv4 only */
+ n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr,
htons(ntohs(cp->dport)-1),
IP_VS_CONN_F_NFCT, cp->dest,
skb->mark);
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 547ff33c1efd..127f14046c51 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -199,11 +199,11 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
*/
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblc_entry *en;
- en = ip_vs_lblc_get(dest->af, tbl, daddr);
+ en = ip_vs_lblc_get(af, tbl, daddr);
if (en) {
if (en->dest == dest)
return en;
@@ -213,8 +213,8 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
ip_vs_dest_hold(dest);
@@ -521,13 +521,13 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblc_new(tbl, &iph->daddr, dest);
+ ip_vs_lblc_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 3f21a2f47de1..2229d2d8bbe0 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -362,18 +362,18 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
*/
static inline struct ip_vs_lblcr_entry *
ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
- struct ip_vs_dest *dest)
+ u16 af, struct ip_vs_dest *dest)
{
struct ip_vs_lblcr_entry *en;
- en = ip_vs_lblcr_get(dest->af, tbl, daddr);
+ en = ip_vs_lblcr_get(af, tbl, daddr);
if (!en) {
en = kmalloc(sizeof(*en), GFP_ATOMIC);
if (!en)
return NULL;
- en->af = dest->af;
- ip_vs_addr_copy(dest->af, &en->addr, daddr);
+ en->af = af;
+ ip_vs_addr_copy(af, &en->addr, daddr);
en->lastuse = jiffies;
/* initialize its dest set */
@@ -706,13 +706,13 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
/* If we fail to create a cache entry, we'll just use the valid dest */
spin_lock_bh(&svc->sched_lock);
if (!tbl->dead)
- ip_vs_lblcr_new(tbl, &iph->daddr, dest);
+ ip_vs_lblcr_new(tbl, &iph->daddr, svc->af, dest);
spin_unlock_bh(&svc->sched_lock);
out:
IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->daddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
return dest;
}
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 2bdcb1cf2127..19a0769a989a 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -59,7 +59,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
else
IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d "
"inactconns %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->inactconns));
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 961a6de9bb29..a8b63401e773 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -107,7 +107,8 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
out:
IP_VS_DBG_BUF(6, "NQ: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 2f7ea7564044..5b84c0b56642 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -432,7 +432,7 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index e3a697234a98..8e92beb0cca9 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -510,7 +510,7 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
th->fin ? 'F' : '.',
th->ack ? 'A' : '.',
th->rst ? 'R' : '.',
- IP_VS_DBG_ADDR(cp->af, &cp->daddr),
+ IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr),
ntohs(cp->cport),
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 176b87c35e34..58bacfc461ee 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -95,7 +95,7 @@ stop:
spin_unlock_bh(&svc->sched_lock);
IP_VS_DBG_BUF(6, "RR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt), atomic_read(&dest->weight));
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index e446b9fa7424..f8e2d00f528b 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -108,7 +108,8 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "SED: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index cc65b2f42cd4..98a13433b68c 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -138,7 +138,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port));
/* if the original dest is unavailable, loop around the table
* starting from ihash to find a new dest
@@ -153,7 +153,7 @@ ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
return dest;
IP_VS_DBG_BUF(6, "SH: selected unavailable "
"server %s:%d (offset %d), reselecting",
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port), roffset);
}
@@ -192,7 +192,7 @@ ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
RCU_INIT_POINTER(b->dest, dest);
IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
- i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ i, IP_VS_DBG_ADDR(dest->af, &dest->addr),
atomic_read(&dest->weight));
/* Don't move to next dest until filling weight */
@@ -342,7 +342,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
IP_VS_DBG_ADDR(svc->af, &iph->saddr),
- IP_VS_DBG_ADDR(svc->af, &dest->addr),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
return dest;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index eadffb29dec0..7162c86fd50d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -880,10 +880,17 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* but still handled.
*/
rcu_read_lock();
- dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
- param->vport, protocol, fwmark, flags);
+ /* This function is only invoked by the synchronization
+ * code. We do not currently support heterogeneous pools
+ * with synchronization, so we can make the assumption that
+ * the svc_af is the same as the dest_af
+ */
+ dest = ip_vs_find_dest(net, type, type, daddr, dport,
+ param->vaddr, param->vport, protocol,
+ fwmark, flags);
- cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+ cp = ip_vs_conn_new(param, type, daddr, dport, flags, dest,
+ fwmark);
rcu_read_unlock();
if (!cp) {
kfree(param->pe_data);
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index b5b4650d50a9..6b366fd90554 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -80,7 +80,8 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
IP_VS_DBG_BUF(6, "WLC: server %s:%u "
"activeconns %d refcnt %d weight %d overhead %d\n",
- IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port),
+ IP_VS_DBG_ADDR(least->af, &least->addr),
+ ntohs(least->port),
atomic_read(&least->activeconns),
atomic_read(&least->refcnt),
atomic_read(&least->weight), loh);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 0546cd572d6b..17e6d4406ca7 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -216,7 +216,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
found:
IP_VS_DBG_BUF(6, "WRR: server %s:%u "
"activeconns %d refcnt %d weight %d\n",
- IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
+ IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
atomic_read(&dest->activeconns),
atomic_read(&dest->refcnt),
atomic_read(&dest->weight));
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 56896a412bce..91f17c1eb8a2 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -157,18 +157,113 @@ retry:
return rt;
}
+#ifdef CONFIG_IP_VS_IPV6
+static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
+{
+ return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
+}
+#endif
+
+static inline bool crosses_local_route_boundary(int skb_af, struct sk_buff *skb,
+ int rt_mode,
+ bool new_rt_is_local)
+{
+ bool rt_mode_allow_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_non_local = !!(rt_mode & IP_VS_RT_MODE_LOCAL);
+ bool rt_mode_allow_redirect = !!(rt_mode & IP_VS_RT_MODE_RDR);
+ bool source_is_loopback;
+ bool old_rt_is_local;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ int addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
+
+ source_is_loopback =
+ (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
+ (addr_type & IPV6_ADDR_LOOPBACK);
+ old_rt_is_local = __ip_vs_is_local_route6(
+ (struct rt6_info *)skb_dst(skb));
+ } else
+#endif
+ {
+ source_is_loopback = ipv4_is_loopback(ip_hdr(skb)->saddr);
+ old_rt_is_local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+ }
+
+ if (unlikely(new_rt_is_local)) {
+ if (!rt_mode_allow_local)
+ return true;
+ if (!rt_mode_allow_redirect && !old_rt_is_local)
+ return true;
+ } else {
+ if (!rt_mode_allow_non_local)
+ return true;
+ if (source_is_loopback)
+ return true;
+ }
+ return false;
+}
+
+static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
+{
+ struct sock *sk = skb->sk;
+ struct rtable *ort = skb_rtable(skb);
+
+ if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+}
+
+static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+ struct ip_vs_iphdr *ipvsh,
+ struct sk_buff *skb, int mtu)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ struct net *net = dev_net(skb_dst(skb)->dev);
+
+ if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
+ /* only send ICMP too big on first fragment */
+ if (!ipvsh->fragoffs)
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ IP_VS_DBG(1, "frag needed for %pI6c\n",
+ &ipv6_hdr(skb)->saddr);
+ return false;
+ }
+ } else
+#endif
+ {
+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
+ /* If we're going to tunnel the packet and pmtu discovery
+ * is disabled, we'll just fragment it anyway
+ */
+ if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs))
+ return true;
+
+ if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
+ skb->len > mtu && !skb_is_gso(skb))) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ IP_VS_DBG(1, "frag needed for %pI4\n",
+ &ip_hdr(skb)->saddr);
+ return false;
+ }
+ }
+
+ return true;
+}
+
/* Get route to destination or remote server */
static int
-__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
- __be32 daddr, int rt_mode, __be32 *ret_saddr)
+__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+ __be32 daddr, int rt_mode, __be32 *ret_saddr,
+ struct ip_vs_iphdr *ipvsh)
{
struct net *net = dev_net(skb_dst(skb)->dev);
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
- struct rtable *ort; /* Original route */
- struct iphdr *iph;
- __be16 df;
int mtu;
int local, noref = 1;
@@ -218,30 +313,14 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
}
local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
- (rt->rt_flags & RTCF_LOCAL) ?
- "local":"non-local", &daddr);
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI4\n", &dest->addr.ip);
goto err_put;
}
- iph = ip_hdr(skb);
- if (likely(!local)) {
- if (unlikely(ipv4_is_loopback(iph->saddr))) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI4 to non-local address, dest: %pI4\n",
- &iph->saddr, &daddr);
- goto err_put;
- }
- } else {
- ort = skb_rtable(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !(ort->rt_flags & RTCF_LOCAL)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
- "local requires NAT method, dest: %pI4\n",
- &iph->daddr, &daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
ip_rt_put(rt);
@@ -250,28 +329,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
mtu = dst_mtu(&rt->dst);
- df = iph->frag_off & htons(IP_DF);
} else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
}
- ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
- /* MTU check allowed? */
- df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- /* MTU checking */
- if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -295,12 +363,6 @@ err_unreach:
}
#ifdef CONFIG_IP_VS_IPV6
-
-static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
-{
- return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;
-}
-
static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
struct in6_addr *ret_saddr, int do_xfrm)
@@ -339,14 +401,13 @@ out_err:
* Get route to destination or remote server
*/
static int
-__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
- struct rt6_info *ort; /* Original route */
struct dst_entry *dst;
int mtu;
int local, noref = 1;
@@ -393,32 +454,15 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
}
local = __ip_vs_is_local_route6(rt);
- if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
- rt_mode)) {
- IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
- local ? "local":"non-local", daddr);
+
+ if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
+ local))) {
+ IP_VS_DBG_RL("We are crossing local and non-local addresses"
+ " daddr=%pI6\n", &dest->addr.in6);
goto err_put;
}
- if (likely(!local)) {
- if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
- IPV6_ADDR_LOOPBACK)) {
- IP_VS_DBG_RL("Stopping traffic from loopback address "
- "%pI6c to non-local address, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->saddr, daddr);
- goto err_put;
- }
- } else {
- ort = (struct rt6_info *) skb_dst(skb);
- if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
- !__ip_vs_is_local_route6(ort)) {
- IP_VS_DBG_RL("Redirect from non-local address %pI6c "
- "to local requires NAT method, "
- "dest: %pI6c\n",
- &ipv6_hdr(skb)->daddr, daddr);
- goto err_put;
- }
+
+ if (unlikely(local)) {
/* skb to local stack, preserve old route */
if (!noref)
dst_release(&rt->dst);
@@ -429,28 +473,17 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
mtu = dst_mtu(&rt->dst);
else {
- struct sock *sk = skb->sk;
-
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
goto err_put;
}
- ort = (struct rt6_info *) skb_dst(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+ maybe_update_pmtu(skb_af, skb, mtu);
}
- if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
- if (!skb->dev)
- skb->dev = net->loopback_dev;
- /* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
+ if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
- }
skb_dst_drop(skb);
if (noref) {
@@ -556,8 +589,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
- NULL) < 0)
+ if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+ IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
ip_send_check(iph);
@@ -586,7 +619,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
+ if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
@@ -633,10 +666,10 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
was_input = rt_is_input_route(skb_rtable(skb));
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_RDR, NULL);
+ IP_VS_RT_MODE_RDR, NULL, ipvsh);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
@@ -721,8 +754,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR);
@@ -791,6 +824,81 @@ tx_error:
}
#endif
+/* When forwarding a packet, we must ensure that we've got enough headroom
+ * for the encapsulation packet in the skb. This also gives us an
+ * opportunity to figure out what the payload_len, dsfield, ttl, and df
+ * values should be, so that we won't need to look at the old ip header
+ * again
+ */
+static struct sk_buff *
+ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
+ unsigned int max_headroom, __u8 *next_protocol,
+ __u32 *payload_len, __u8 *dsfield, __u8 *ttl,
+ __be16 *df)
+{
+ struct sk_buff *new_skb = NULL;
+ struct iphdr *old_iph = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+ struct ipv6hdr *old_ipv6h = NULL;
+#endif
+
+ if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
+ new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (!new_skb)
+ goto error;
+ consume_skb(skb);
+ skb = new_skb;
+ }
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (skb_af == AF_INET6) {
+ old_ipv6h = ipv6_hdr(skb);
+ *next_protocol = IPPROTO_IPV6;
+ if (payload_len)
+ *payload_len =
+ ntohs(old_ipv6h->payload_len) +
+ sizeof(*old_ipv6h);
+ *dsfield = ipv6_get_dsfield(old_ipv6h);
+ *ttl = old_ipv6h->hop_limit;
+ if (df)
+ *df = 0;
+ } else
+#endif
+ {
+ old_iph = ip_hdr(skb);
+ /* Copy DF, reset fragment offset and MF */
+ if (df)
+ *df = (old_iph->frag_off & htons(IP_DF));
+ *next_protocol = IPPROTO_IPIP;
+
+ /* fix old IP header checksum */
+ ip_send_check(old_iph);
+ *dsfield = ipv4_get_dsfield(old_iph);
+ *ttl = old_iph->ttl;
+ if (payload_len)
+ *payload_len = ntohs(old_iph->tot_len);
+ }
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return ERR_PTR(-ENOMEM);
+}
+
+static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
+{
+ if (encaps_af == AF_INET) {
+ if (orig_af == AF_INET)
+ return SKB_GSO_IPIP;
+
+ return SKB_GSO_SIT;
+ }
+
+ /* GSO: we need to provide proper SKB_GSO_ value for IPv6:
+ * SKB_GSO_SIT/IPV6
+ */
+ return 0;
+}
/*
* IP Tunneling transmitter
@@ -819,9 +927,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct iphdr *old_iph = ip_hdr(skb);
- u8 tos = old_iph->tos;
- __be16 df;
+ __u8 next_protocol = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
+ __be16 df = 0;
+ __be16 *dfp = NULL;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
@@ -829,11 +939,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
- IP_VS_RT_MODE_TUNNEL, &saddr);
+ IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -844,29 +954,21 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rt = skb_rtable(skb);
tdev = rt->dst.dev;
- /* Copy DF, reset fragment offset and MF */
- df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
-
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
-
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ip_hdr(skb);
- }
-
- /* fix old IP header checksum */
- ip_send_check(old_iph);
+ /* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
+ dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, NULL, &dsfield,
+ &ttl, dfp);
+ if (IS_ERR(skb))
+ goto tx_error;
- skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP);
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET, cp->af));
if (IS_ERR(skb))
goto tx_error;
@@ -883,11 +985,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->version = 4;
iph->ihl = sizeof(struct iphdr)>>2;
iph->frag_off = df;
- iph->protocol = IPPROTO_IPIP;
- iph->tos = tos;
+ iph->protocol = next_protocol;
+ iph->tos = dsfield;
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
- iph->ttl = old_iph->ttl;
+ iph->ttl = ttl;
ip_select_ident(skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
@@ -920,7 +1022,10 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct rt6_info *rt; /* Route to the other host */
struct in6_addr saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
- struct ipv6hdr *old_iph = ipv6_hdr(skb);
+ __u8 next_protocol = 0;
+ __u32 payload_len = 0;
+ __u8 dsfield = 0;
+ __u8 ttl = 0;
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
@@ -928,7 +1033,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -948,19 +1053,14 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
- if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
- struct sk_buff *new_skb =
- skb_realloc_headroom(skb, max_headroom);
-
- if (!new_skb)
- goto tx_error;
- consume_skb(skb);
- skb = new_skb;
- old_iph = ipv6_hdr(skb);
- }
+ skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
+ &next_protocol, &payload_len,
+ &dsfield, &ttl, NULL);
+ if (IS_ERR(skb))
+ goto tx_error;
- /* GSO: we need to provide proper SKB_GSO_ value for IPv6 */
- skb = iptunnel_handle_offloads(skb, false, 0); /* SKB_GSO_SIT/IPV6 */
+ skb = iptunnel_handle_offloads(
+ skb, false, __tun_gso_type_mask(AF_INET6, cp->af));
if (IS_ERR(skb))
goto tx_error;
@@ -975,14 +1075,13 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
iph = ipv6_hdr(skb);
iph->version = 6;
- iph->nexthdr = IPPROTO_IPV6;
- iph->payload_len = old_iph->payload_len;
- be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
+ iph->nexthdr = next_protocol;
+ iph->payload_len = htons(payload_len);
memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
- ipv6_change_dsfield(iph, 0, ipv6_get_dsfield(old_iph));
+ ipv6_change_dsfield(iph, 0, dsfield);
iph->daddr = cp->daddr.in6;
iph->saddr = saddr;
- iph->hop_limit = old_iph->hop_limit;
+ iph->hop_limit = ttl;
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
@@ -1021,10 +1120,10 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
- IP_VS_RT_MODE_KNOWN_NH, NULL);
+ IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
if (local < 0)
goto tx_error;
if (local) {
@@ -1060,8 +1159,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0,
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL);
if (local < 0)
@@ -1128,7 +1227,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
+ local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+ NULL, iph);
if (local < 0)
goto tx_error;
rt = skb_rtable(skb);
@@ -1219,8 +1319,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
- ipvsh, 0, rt_mode);
+ local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index de88c4ab5146..5016a6929085 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -142,7 +142,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
static u32 __hash_bucket(u32 hash, unsigned int size)
{
- return ((u64)hash * size) >> 32;
+ return reciprocal_scale(hash, size);
}
static u32 hash_bucket(u32 hash, const struct net *net)
@@ -358,7 +358,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
tstamp = nf_conn_tstamp_find(ct);
if (tstamp && tstamp->stop == 0)
- tstamp->stop = ktime_to_ns(ktime_get_real());
+ tstamp->stop = ktime_get_real_ns();
if (nf_ct_is_dying(ct))
goto delete;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index f87e8f68ad45..91a1837acd0e 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -83,7 +83,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
(((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
(__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
- return ((u64)hash * nf_ct_expect_hsize) >> 32;
+
+ return reciprocal_scale(hash, nf_ct_expect_hsize);
}
struct nf_conntrack_expect *
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 355a5c4ef763..1bd9ed9e62f6 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1737,7 +1737,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
}
tstamp = nf_conn_tstamp_find(ct);
if (tstamp)
- tstamp->start = ktime_to_ns(ktime_get_real());
+ tstamp->start = ktime_get_real_ns();
err = nf_conntrack_hash_check_insert(ct);
if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index d25f29377648..957c1db66652 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -14,6 +14,30 @@
static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ;
+static bool nf_generic_should_process(u8 proto)
+{
+ switch (proto) {
+#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE
+ case IPPROTO_SCTP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE
+ case IPPROTO_DCCP:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
+ case IPPROTO_GRE:
+ return false;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE
+ case IPPROTO_UDPLITE:
+ return false;
+#endif
+ default:
+ return true;
+ }
+}
+
static inline struct nf_generic_net *generic_pernet(struct net *net)
{
return &net->ct.nf_ct_proto.generic;
@@ -67,7 +91,7 @@ static int generic_packet(struct nf_conn *ct,
static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb,
unsigned int dataoff, unsigned int *timeouts)
{
- return true;
+ return nf_generic_should_process(nf_ct_protonum(ct));
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index f641751dba9d..cf65a1e040dd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -101,7 +101,7 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
{
struct ct_iter_state *st = seq->private;
- st->time_now = ktime_to_ns(ktime_get_real());
+ st->time_now = ktime_get_real_ns();
rcu_read_lock();
return ct_get_idx(seq, *pos);
}
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index eeb8ef4ff1a3..a2233e77cf39 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -158,7 +158,7 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
'0' + loginfo->u.log.level, prefix,
in ? in->name : "",
out ? out->name : "");
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge) {
const struct net_device *physindev;
const struct net_device *physoutdev;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 552f97cd9fde..4e0b47831d43 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -126,7 +126,8 @@ hash_by_src(const struct net *net, u16 zone,
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
- return ((u64)hash * net->ct.nat_htable_size) >> 32;
+
+ return reciprocal_scale(hash, net->ct.nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -274,7 +275,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
}
var_ipp->all[i] = (__force __u32)
- htonl(minip + (((u64)j * dist) >> 32));
+ htonl(minip + reciprocal_scale(j, dist));
if (var_ipp->all[i] != range->max_addr.all[i])
full_range = true;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5d24b1fdb593..4c8b68e5fa16 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -52,7 +52,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(entry->indev);
if (entry->outdev)
dev_put(entry->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
@@ -77,7 +77,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(entry->indev);
if (entry->outdev)
dev_hold(entry->outdev);
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
struct net_device *physdev;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index deeb95fb7028..556a0dfa4abc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -127,6 +127,204 @@ static void nft_trans_destroy(struct nft_trans *trans)
kfree(trans);
}
+static void nf_tables_unregister_hooks(const struct nft_table *table,
+ const struct nft_chain *chain,
+ unsigned int hook_nops)
+{
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ chain->flags & NFT_BASE_CHAIN)
+ nf_unregister_hooks(nft_base_chain(chain)->ops, hook_nops);
+}
+
+/* Internal table flags */
+#define NFT_TABLE_INACTIVE (1 << 15)
+
+static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWTABLE)
+ ctx->table->flags |= NFT_TABLE_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
+static int nft_deltable(struct nft_ctx *ctx)
+{
+ int err;
+
+ err = nft_trans_table_add(ctx, NFT_MSG_DELTABLE);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&ctx->table->list);
+ return err;
+}
+
+static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWCHAIN)
+ ctx->chain->flags |= NFT_CHAIN_INACTIVE;
+
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ return 0;
+}
+
+static int nft_delchain(struct nft_ctx *ctx)
+{
+ int err;
+
+ err = nft_trans_chain_add(ctx, NFT_MSG_DELCHAIN);
+ if (err < 0)
+ return err;
+
+ ctx->table->use--;
+ list_del_rcu(&ctx->chain->list);
+
+ return err;
+}
+
+static inline bool
+nft_rule_is_active(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << net->nft.gencursor)) == 0;
+}
+
+static inline int gencursor_next(struct net *net)
+{
+ return net->nft.gencursor+1 == 1 ? 1 : 0;
+}
+
+static inline int
+nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
+{
+ return (rule->genmask & (1 << gencursor_next(net))) == 0;
+}
+
+static inline void
+nft_rule_activate_next(struct net *net, struct nft_rule *rule)
+{
+ /* Now inactive, will be active in the future */
+ rule->genmask = (1 << net->nft.gencursor);
+}
+
+static inline void
+nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = (1 << gencursor_next(net));
+}
+
+static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
+{
+ rule->genmask = 0;
+}
+
+static int
+nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+ /* You cannot delete the same rule twice */
+ if (nft_rule_is_active_next(ctx->net, rule)) {
+ nft_rule_deactivate_next(ctx->net, rule);
+ ctx->chain->use--;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_rule *rule)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
+ if (trans == NULL)
+ return NULL;
+
+ nft_trans_rule(trans) = rule;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return trans;
+}
+
+static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
+{
+ struct nft_trans *trans;
+ int err;
+
+ trans = nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule);
+ if (trans == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_delrule_deactivate(ctx, rule);
+ if (err < 0) {
+ nft_trans_destroy(trans);
+ return err;
+ }
+
+ return 0;
+}
+
+static int nft_delrule_by_chain(struct nft_ctx *ctx)
+{
+ struct nft_rule *rule;
+ int err;
+
+ list_for_each_entry(rule, &ctx->chain->rules, list) {
+ err = nft_delrule(ctx, rule);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+/* Internal set flag */
+#define NFT_SET_INACTIVE (1 << 15)
+
+static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ struct nft_trans *trans;
+
+ trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
+ if (trans == NULL)
+ return -ENOMEM;
+
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ nft_trans_set_id(trans) =
+ ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
+ set->flags |= NFT_SET_INACTIVE;
+ }
+ nft_trans_set(trans) = set;
+ list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+ return 0;
+}
+
+static int nft_delset(struct nft_ctx *ctx, struct nft_set *set)
+{
+ int err;
+
+ err = nft_trans_set_add(ctx, NFT_MSG_DELSET, set);
+ if (err < 0)
+ return err;
+
+ list_del_rcu(&set->list);
+ ctx->table->use--;
+
+ return err;
+}
+
/*
* Tables
*/
@@ -207,9 +405,9 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
-static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table)
+static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -222,7 +420,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
@@ -250,8 +448,8 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
- err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table);
+ err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -290,7 +488,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_table_info(skb,
+ if (nf_tables_fill_table_info(skb, net,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWTABLE,
@@ -309,9 +507,6 @@ done:
return skb->len;
}
-/* Internal table flags */
-#define NFT_TABLE_INACTIVE (1 << 15)
-
static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -345,7 +540,7 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_table_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
family, table);
if (err < 0)
@@ -443,21 +638,6 @@ err:
return ret;
}
-static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
- if (trans == NULL)
- return -ENOMEM;
-
- if (msg_type == NFT_MSG_NEWTABLE)
- ctx->table->flags |= NFT_TABLE_INACTIVE;
-
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return 0;
-}
-
static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -527,6 +707,67 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
return 0;
}
+static int nft_flush_table(struct nft_ctx *ctx)
+{
+ int err;
+ struct nft_chain *chain, *nc;
+ struct nft_set *set, *ns;
+
+ list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) {
+ ctx->chain = chain;
+
+ err = nft_delrule_by_chain(ctx);
+ if (err < 0)
+ goto out;
+
+ err = nft_delchain(ctx);
+ if (err < 0)
+ goto out;
+ }
+
+ list_for_each_entry_safe(set, ns, &ctx->table->sets, list) {
+ if (set->flags & NFT_SET_ANONYMOUS &&
+ !list_empty(&set->bindings))
+ continue;
+
+ err = nft_delset(ctx, set);
+ if (err < 0)
+ goto out;
+ }
+
+ err = nft_deltable(ctx);
+out:
+ return err;
+}
+
+static int nft_flush(struct nft_ctx *ctx, int family)
+{
+ struct nft_af_info *afi;
+ struct nft_table *table, *nt;
+ const struct nlattr * const *nla = ctx->nla;
+ int err = 0;
+
+ list_for_each_entry(afi, &ctx->net->nft.af_info, list) {
+ if (family != AF_UNSPEC && afi->family != family)
+ continue;
+
+ ctx->afi = afi;
+ list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ if (nla[NFTA_TABLE_NAME] &&
+ nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0)
+ continue;
+
+ ctx->table = table;
+
+ err = nft_flush_table(ctx);
+ if (err < 0)
+ goto out;
+ }
+ }
+out:
+ return err;
+}
+
static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -535,9 +776,13 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
struct nft_af_info *afi;
struct nft_table *table;
struct net *net = sock_net(skb->sk);
- int family = nfmsg->nfgen_family, err;
+ int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
+ nft_ctx_init(&ctx, skb, nlh, NULL, NULL, NULL, nla);
+ if (family == AF_UNSPEC || nla[NFTA_TABLE_NAME] == NULL)
+ return nft_flush(&ctx, family);
+
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
return PTR_ERR(afi);
@@ -547,16 +792,11 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
return PTR_ERR(table);
if (table->flags & NFT_TABLE_INACTIVE)
return -ENOENT;
- if (table->use > 0)
- return -EBUSY;
- nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
- err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
- if (err < 0)
- return err;
+ ctx.afi = afi;
+ ctx.table = table;
- list_del_rcu(&table->list);
- return 0;
+ return nft_flush_table(&ctx);
}
static void nf_tables_table_destroy(struct nft_ctx *ctx)
@@ -674,9 +914,9 @@ nla_put_failure:
return -ENOSPC;
}
-static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
- const struct nft_table *table,
+static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event, u32 flags,
+ int family, const struct nft_table *table,
const struct nft_chain *chain)
{
struct nlmsghdr *nlh;
@@ -690,7 +930,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
goto nla_put_failure;
@@ -748,8 +988,8 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
- err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -791,7 +1031,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_chain_info(skb, net,
+ NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWCHAIN,
NLM_F_MULTI,
@@ -850,7 +1091,7 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_chain_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
family, table, chain);
if (err < 0)
@@ -913,21 +1154,6 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
rcu_assign_pointer(chain->stats, newstats);
}
-static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
- if (trans == NULL)
- return -ENOMEM;
-
- if (msg_type == NFT_MSG_NEWCHAIN)
- ctx->chain->flags |= NFT_CHAIN_INACTIVE;
-
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
- return 0;
-}
-
static void nf_tables_chain_destroy(struct nft_chain *chain)
{
BUG_ON(chain->use > 0);
@@ -1157,11 +1383,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
list_add_tail_rcu(&chain->list, &table->chains);
return 0;
err2:
- if (!(table->flags & NFT_TABLE_F_DORMANT) &&
- chain->flags & NFT_BASE_CHAIN) {
- nf_unregister_hooks(nft_base_chain(chain)->ops,
- afi->nops);
- }
+ nf_tables_unregister_hooks(table, chain, afi->nops);
err1:
nf_tables_chain_destroy(chain);
return err;
@@ -1178,7 +1400,6 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_ctx ctx;
- int err;
afi = nf_tables_afinfo_lookup(net, family, false);
if (IS_ERR(afi))
@@ -1199,13 +1420,8 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
return -EBUSY;
nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
- err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
- if (err < 0)
- return err;
- table->use--;
- list_del_rcu(&chain->list);
- return 0;
+ return nft_delchain(&ctx);
}
/*
@@ -1432,8 +1648,9 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
.len = NFT_USERDATA_MAXLEN },
};
-static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, u32 flags, int family,
+static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq, int event,
+ u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
const struct nft_rule *rule)
@@ -1453,7 +1670,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
goto nla_put_failure;
@@ -1509,8 +1726,8 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
- err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
- ctx->afi->family, ctx->table,
+ err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
+ event, 0, ctx->afi->family, ctx->table,
ctx->chain, rule);
if (err < 0) {
kfree_skb(skb);
@@ -1527,41 +1744,6 @@ err:
return err;
}
-static inline bool
-nft_rule_is_active(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & (1 << net->nft.gencursor)) == 0;
-}
-
-static inline int gencursor_next(struct net *net)
-{
- return net->nft.gencursor+1 == 1 ? 1 : 0;
-}
-
-static inline int
-nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
-{
- return (rule->genmask & (1 << gencursor_next(net))) == 0;
-}
-
-static inline void
-nft_rule_activate_next(struct net *net, struct nft_rule *rule)
-{
- /* Now inactive, will be active in the future */
- rule->genmask = (1 << net->nft.gencursor);
-}
-
-static inline void
-nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
-{
- rule->genmask = (1 << gencursor_next(net));
-}
-
-static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
-{
- rule->genmask = 0;
-}
-
static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -1591,7 +1773,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
+ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
@@ -1657,7 +1839,7 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
if (!skb2)
return -ENOMEM;
- err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
+ err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
family, table, chain, rule);
if (err < 0)
@@ -1687,21 +1869,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
- struct nft_rule *rule)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
- if (trans == NULL)
- return NULL;
-
- nft_trans_rule(trans) = rule;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-
- return trans;
-}
-
#define NFT_RULE_MAXEXPRS 128
static struct nft_expr_info *info;
@@ -1823,7 +1990,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
err = -ENOMEM;
goto err2;
}
- nft_rule_disactivate_next(net, old_rule);
+ nft_rule_deactivate_next(net, old_rule);
chain->use--;
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
@@ -1867,33 +2034,6 @@ err1:
return err;
}
-static int
-nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
-{
- /* You cannot delete the same rule twice */
- if (nft_rule_is_active_next(ctx->net, rule)) {
- if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
- return -ENOMEM;
- nft_rule_disactivate_next(ctx->net, rule);
- ctx->chain->use--;
- return 0;
- }
- return -ENOENT;
-}
-
-static int nf_table_delrule_by_chain(struct nft_ctx *ctx)
-{
- struct nft_rule *rule;
- int err;
-
- list_for_each_entry(rule, &ctx->chain->rules, list) {
- err = nf_tables_delrule_one(ctx, rule);
- if (err < 0)
- return err;
- }
- return 0;
-}
-
static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -1932,14 +2072,14 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(rule))
return PTR_ERR(rule);
- err = nf_tables_delrule_one(&ctx, rule);
+ err = nft_delrule(&ctx, rule);
} else {
- err = nf_table_delrule_by_chain(&ctx);
+ err = nft_delrule_by_chain(&ctx);
}
} else {
list_for_each_entry(chain, &table->chains, list) {
ctx.chain = chain;
- err = nf_table_delrule_by_chain(&ctx);
+ err = nft_delrule_by_chain(&ctx);
if (err < 0)
break;
}
@@ -2183,7 +2323,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
@@ -2204,6 +2344,11 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (set->policy != NFT_SET_POL_PERFORMANCE) {
+ if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
+ goto nla_put_failure;
+ }
+
desc = nla_nest_start(skb, NFTA_SET_DESC);
if (desc == NULL)
goto nla_put_failure;
@@ -2322,8 +2467,6 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
return 0;
}
-#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
-
static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2398,26 +2541,6 @@ static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
return 0;
}
-static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
- struct nft_set *set)
-{
- struct nft_trans *trans;
-
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
- if (trans == NULL)
- return -ENOMEM;
-
- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
- nft_trans_set_id(trans) =
- ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
- set->flags |= NFT_SET_INACTIVE;
- }
- nft_trans_set(trans) = set;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
-
- return 0;
-}
-
static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -2551,6 +2674,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
+ set->policy = policy;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2611,13 +2735,7 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (!list_empty(&set->bindings))
return -EBUSY;
- err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
- if (err < 0)
- return err;
-
- list_del_rcu(&set->list);
- ctx.table->use--;
- return 0;
+ return nft_delset(&ctx, set);
}
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
@@ -2815,7 +2933,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx.afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx.net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
goto nla_put_failure;
@@ -2896,7 +3014,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = ctx->afi->family;
nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
+ nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
@@ -3183,6 +3301,87 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
return err;
}
+static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
+ u32 portid, u32 seq)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_NEWGEN;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
+ if (nlh == NULL)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
+
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
+ goto nla_put_failure;
+
+ return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+ nlmsg_trim(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+{
+ struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ struct sk_buff *skb2;
+ int err;
+
+ if (nlmsg_report(nlh) &&
+ !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
+ return 0;
+
+ err = -ENOBUFS;
+ skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ goto err;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0) {
+ kfree_skb(skb2);
+ goto err;
+ }
+
+ err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
+ NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+err:
+ if (err < 0) {
+ nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+ err);
+ }
+ return err;
+}
+
+static int nf_tables_getgen(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nla[])
+{
+ struct net *net = sock_net(skb->sk);
+ struct sk_buff *skb2;
+ int err;
+
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ err = nf_tables_fill_gen_info(skb2, net, NETLINK_CB(skb).portid,
+ nlh->nlmsg_seq);
+ if (err < 0)
+ goto err;
+
+ return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
+err:
+ kfree_skb(skb2);
+ return err;
+}
+
static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
[NFT_MSG_NEWTABLE] = {
.call_batch = nf_tables_newtable,
@@ -3259,6 +3458,9 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
+ [NFT_MSG_GETGEN] = {
+ .call = nf_tables_getgen,
+ },
};
static void nft_chain_commit_update(struct nft_trans *trans)
@@ -3352,11 +3554,9 @@ static int nf_tables_commit(struct sk_buff *skb)
break;
case NFT_MSG_DELCHAIN:
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
- trans->ctx.chain->flags & NFT_BASE_CHAIN) {
- nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
- trans->ctx.afi->nops);
- }
+ nf_tables_unregister_hooks(trans->ctx.table,
+ trans->ctx.chain,
+ trans->ctx.afi->nops);
break;
case NFT_MSG_NEWRULE:
nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
@@ -3418,6 +3618,8 @@ static int nf_tables_commit(struct sk_buff *skb)
call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
}
+ nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
+
return 0;
}
@@ -3479,11 +3681,9 @@ static int nf_tables_abort(struct sk_buff *skb)
} else {
trans->ctx.table->use--;
list_del_rcu(&trans->ctx.chain->list);
- if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
- trans->ctx.chain->flags & NFT_BASE_CHAIN) {
- nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
- trans->ctx.afi->nops);
- }
+ nf_tables_unregister_hooks(trans->ctx.table,
+ trans->ctx.chain,
+ trans->ctx.afi->nops);
}
break;
case NFT_MSG_DELCHAIN:
@@ -3963,6 +4163,7 @@ static void __exit nf_tables_module_exit(void)
{
unregister_pernet_subsys(&nf_tables_net_ops);
nfnetlink_subsys_unregister(&nf_tables_subsys);
+ rcu_barrier();
nf_tables_core_module_exit();
kfree(info);
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c138b8fbe280..6c5a915cfa75 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -222,6 +222,51 @@ replay:
}
}
+struct nfnl_err {
+ struct list_head head;
+ struct nlmsghdr *nlh;
+ int err;
+};
+
+static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err)
+{
+ struct nfnl_err *nfnl_err;
+
+ nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL);
+ if (nfnl_err == NULL)
+ return -ENOMEM;
+
+ nfnl_err->nlh = nlh;
+ nfnl_err->err = err;
+ list_add_tail(&nfnl_err->head, list);
+
+ return 0;
+}
+
+static void nfnl_err_del(struct nfnl_err *nfnl_err)
+{
+ list_del(&nfnl_err->head);
+ kfree(nfnl_err);
+}
+
+static void nfnl_err_reset(struct list_head *err_list)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head)
+ nfnl_err_del(nfnl_err);
+}
+
+static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb)
+{
+ struct nfnl_err *nfnl_err, *next;
+
+ list_for_each_entry_safe(nfnl_err, next, err_list, head) {
+ netlink_ack(skb, nfnl_err->nlh, nfnl_err->err);
+ nfnl_err_del(nfnl_err);
+ }
+}
+
static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
u_int16_t subsys_id)
{
@@ -230,6 +275,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
const struct nfnetlink_subsystem *ss;
const struct nfnl_callback *nc;
bool success = true, done = false;
+ static LIST_HEAD(err_list);
int err;
if (subsys_id >= NFNL_SUBSYS_COUNT)
@@ -287,6 +333,7 @@ replay:
type = nlh->nlmsg_type;
if (type == NFNL_MSG_BATCH_BEGIN) {
/* Malformed: Batch begin twice */
+ nfnl_err_reset(&err_list);
success = false;
goto done;
} else if (type == NFNL_MSG_BATCH_END) {
@@ -333,7 +380,8 @@ replay:
* original skb.
*/
if (err == -EAGAIN) {
- ss->abort(skb);
+ nfnl_err_reset(&err_list);
+ ss->abort(oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
goto replay;
@@ -341,11 +389,24 @@ replay:
}
ack:
if (nlh->nlmsg_flags & NLM_F_ACK || err) {
+ /* Errors are delivered once the full batch has been
+ * processed, this avoids that the same error is
+ * reported several times when replaying the batch.
+ */
+ if (nfnl_err_add(&err_list, nlh, err) < 0) {
+ /* We failed to enqueue an error, reset the
+ * list of errors and send OOM to userspace
+ * pointing to the batch header.
+ */
+ nfnl_err_reset(&err_list);
+ netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
+ success = false;
+ goto done;
+ }
/* We don't stop processing the batch on errors, thus,
* userspace gets all the errors that the batch
* triggers.
*/
- netlink_ack(skb, nlh, err);
if (err)
success = false;
}
@@ -357,10 +418,11 @@ ack:
}
done:
if (success && done)
- ss->commit(skb);
+ ss->commit(oskb);
else
- ss->abort(skb);
+ ss->abort(oskb);
+ nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id);
kfree_skb(nskb);
}
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 3ea0eacbd970..c18af2f63eef 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -40,6 +40,11 @@ struct nf_acct {
char data[0];
};
+struct nfacct_filter {
+ u32 value;
+ u32 mask;
+};
+
#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
#define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
@@ -181,6 +186,7 @@ static int
nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nf_acct *cur, *last;
+ const struct nfacct_filter *filter = cb->data;
if (cb->args[2])
return 0;
@@ -197,6 +203,10 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
last = NULL;
}
+
+ if (filter && (cur->flags & filter->mask) != filter->value)
+ continue;
+
if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
@@ -211,6 +221,38 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int nfnl_acct_done(struct netlink_callback *cb)
+{
+ kfree(cb->data);
+ return 0;
+}
+
+static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
+ [NFACCT_FILTER_MASK] = { .type = NLA_U32 },
+ [NFACCT_FILTER_VALUE] = { .type = NLA_U32 },
+};
+
+static struct nfacct_filter *
+nfacct_filter_alloc(const struct nlattr * const attr)
+{
+ struct nfacct_filter *filter;
+ struct nlattr *tb[NFACCT_FILTER_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
+ if (!filter)
+ return ERR_PTR(-ENOMEM);
+
+ filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
+ filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
+
+ return filter;
+}
+
static int
nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
@@ -222,7 +264,18 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nfnl_acct_dump,
+ .done = nfnl_acct_done,
};
+
+ if (tb[NFACCT_FILTER]) {
+ struct nfacct_filter *filter;
+
+ filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
+ if (IS_ERR(filter))
+ return PTR_ERR(filter);
+
+ c.data = filter;
+ }
return netlink_dump_start(nfnl, skb, nlh, &c);
}
@@ -314,6 +367,7 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
[NFACCT_PKTS] = { .type = NLA_U64 },
[NFACCT_FLAGS] = { .type = NLA_U32 },
[NFACCT_QUOTA] = { .type = NLA_U64 },
+ [NFACCT_FILTER] = {.type = NLA_NESTED },
};
static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index a11c5ff2f720..b1e3a0579416 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -36,7 +36,7 @@
#include <linux/atomic.h>
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
#endif
@@ -429,7 +429,7 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
if (indev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
@@ -460,7 +460,7 @@ __build_packet_message(struct nfnl_log_net *log,
}
if (outdev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
@@ -640,7 +640,7 @@ nfulnl_log_packet(struct net *net,
+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 108120f216b1..a82077d9f59b 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -36,7 +36,7 @@
#include <linux/atomic.h>
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#include "../bridge/br_private.h"
#endif
@@ -302,7 +302,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
+ nla_total_size(sizeof(u_int32_t)) /* ifindex */
#endif
@@ -380,7 +380,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
indev = entry->indev;
if (indev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
goto nla_put_failure;
#else
@@ -410,7 +410,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
}
if (outdev) {
-#ifndef CONFIG_BRIDGE_NETFILTER
+#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
goto nla_put_failure;
#else
@@ -569,7 +569,7 @@ nf_queue_entry_dup(struct nf_queue_entry *e)
return NULL;
}
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* When called from bridge netfilter, skb->data must point to MAC header
* before calling skb_gso_segment(). Else, original MAC header is lost
* and segmented skbs will be sent to wrong destination.
@@ -763,7 +763,7 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
if (entry->outdev)
if (entry->outdev->ifindex == ifindex)
return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
if (entry->skb->nf_bridge->physindev &&
entry->skb->nf_bridge->physindev->ifindex == ifindex)
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 1840989092ed..7e2683c8a44a 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -101,26 +101,12 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
static void target_compat_from_user(struct xt_target *t, void *in, void *out)
{
-#ifdef CONFIG_COMPAT
- if (t->compat_from_user) {
- int pad;
-
- t->compat_from_user(out, in);
- pad = XT_ALIGN(t->targetsize) - t->targetsize;
- if (pad > 0)
- memset(out + t->targetsize, 0, pad);
- } else
-#endif
- memcpy(out, in, XT_ALIGN(t->targetsize));
-}
+ int pad;
-static inline int nft_compat_target_offset(struct xt_target *target)
-{
-#ifdef CONFIG_COMPAT
- return xt_compat_target_offset(target);
-#else
- return 0;
-#endif
+ memcpy(out, in, t->targetsize);
+ pad = XT_ALIGN(t->targetsize) - t->targetsize;
+ if (pad > 0)
+ memset(out + t->targetsize, 0, pad);
}
static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = {
@@ -208,34 +194,6 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
module_put(target->me);
}
-static int
-target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in)
-{
- int ret;
-
-#ifdef CONFIG_COMPAT
- if (t->compat_to_user) {
- mm_segment_t old_fs;
- void *out;
-
- out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC);
- if (out == NULL)
- return -ENOMEM;
-
- /* We want to reuse existing compat_to_user */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- t->compat_to_user(out, in);
- set_fs(old_fs);
- ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out);
- kfree(out);
- } else
-#endif
- ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in);
-
- return ret;
-}
-
static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct xt_target *target = expr->ops->data;
@@ -243,7 +201,7 @@ static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
- target_dump_info(skb, target, info))
+ nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info))
goto nla_put_failure;
return 0;
@@ -341,17 +299,12 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
static void match_compat_from_user(struct xt_match *m, void *in, void *out)
{
-#ifdef CONFIG_COMPAT
- if (m->compat_from_user) {
- int pad;
-
- m->compat_from_user(out, in);
- pad = XT_ALIGN(m->matchsize) - m->matchsize;
- if (pad > 0)
- memset(out + m->matchsize, 0, pad);
- } else
-#endif
- memcpy(out, in, XT_ALIGN(m->matchsize));
+ int pad;
+
+ memcpy(out, in, m->matchsize);
+ pad = XT_ALIGN(m->matchsize) - m->matchsize;
+ if (pad > 0)
+ memset(out + m->matchsize, 0, pad);
}
static int
@@ -404,43 +357,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
module_put(match->me);
}
-static int
-match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in)
-{
- int ret;
-
-#ifdef CONFIG_COMPAT
- if (m->compat_to_user) {
- mm_segment_t old_fs;
- void *out;
-
- out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC);
- if (out == NULL)
- return -ENOMEM;
-
- /* We want to reuse existing compat_to_user */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- m->compat_to_user(out, in);
- set_fs(old_fs);
- ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out);
- kfree(out);
- } else
-#endif
- ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in);
-
- return ret;
-}
-
-static inline int nft_compat_match_offset(struct xt_match *match)
-{
-#ifdef CONFIG_COMPAT
- return xt_compat_match_offset(match);
-#else
- return 0;
-#endif
-}
-
static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
void *info = nft_expr_priv(expr);
@@ -448,7 +364,7 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
- match_dump_info(skb, match, info))
+ nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info))
goto nla_put_failure;
return 0;
@@ -643,8 +559,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-ENOMEM);
nft_match->ops.type = &nft_match_type;
- nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) +
- nft_compat_match_offset(match));
+ nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
nft_match->ops.eval = nft_match_eval;
nft_match->ops.init = nft_match_init;
nft_match->ops.destroy = nft_match_destroy;
@@ -714,8 +629,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
return ERR_PTR(-ENOMEM);
nft_target->ops.type = &nft_target_type;
- nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) +
- nft_compat_target_offset(target));
+ nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
nft_target->ops.eval = nft_target_eval;
nft_target->ops.init = nft_target_init;
nft_target->ops.destroy = nft_target_destroy;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 28fb8f38e6ba..8892b7b6184a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -180,15 +180,17 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_destroy(const struct nft_set *set)
{
const struct rhashtable *priv = nft_set_priv(set);
- const struct bucket_table *tbl;
+ const struct bucket_table *tbl = priv->tbl;
struct nft_hash_elem *he, *next;
unsigned int i;
- tbl = rht_dereference(priv->tbl, priv);
- for (i = 0; i < tbl->size; i++)
- rht_for_each_entry_safe(he, next, tbl->buckets[i], priv, node)
+ for (i = 0; i < tbl->size; i++) {
+ for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node);
+ he != NULL; he = next) {
+ next = rht_entry(he->node.next, struct nft_hash_elem, node);
nft_hash_elem_destroy(set, he);
-
+ }
+ }
rhashtable_destroy(priv);
}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
new file mode 100644
index 000000000000..6637bab00567
--- /dev/null
+++ b/net/netfilter/nft_masq.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nft_masq.h>
+
+const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
+ [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+};
+EXPORT_SYMBOL_GPL(nft_masq_policy);
+
+int nft_masq_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+
+ if (tb[NFTA_MASQ_FLAGS] == NULL)
+ return 0;
+
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
+ if (priv->flags & ~NF_NAT_RANGE_MASK)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_masq_init);
+
+int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_masq *priv = nft_expr_priv(expr);
+
+ if (priv->flags == 0)
+ return 0;
+
+ if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+EXPORT_SYMBOL_GPL(nft_masq_dump);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 852b178c6ae7..1e7c076ca63a 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -14,6 +14,10 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/smp.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
@@ -124,6 +128,43 @@ void nft_meta_get_eval(const struct nft_expr *expr,
dest->data[0] = skb->secmark;
break;
#endif
+ case NFT_META_PKTTYPE:
+ if (skb->pkt_type != PACKET_LOOPBACK) {
+ dest->data[0] = skb->pkt_type;
+ break;
+ }
+
+ switch (pkt->ops->pf) {
+ case NFPROTO_IPV4:
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+ dest->data[0] = PACKET_MULTICAST;
+ else
+ dest->data[0] = PACKET_BROADCAST;
+ break;
+ case NFPROTO_IPV6:
+ if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
+ dest->data[0] = PACKET_MULTICAST;
+ else
+ dest->data[0] = PACKET_BROADCAST;
+ break;
+ default:
+ WARN_ON(1);
+ goto err;
+ }
+ break;
+ case NFT_META_CPU:
+ dest->data[0] = smp_processor_id();
+ break;
+ case NFT_META_IIFGROUP:
+ if (in == NULL)
+ goto err;
+ dest->data[0] = in->group;
+ break;
+ case NFT_META_OIFGROUP:
+ if (out == NULL)
+ goto err;
+ dest->data[0] = out->group;
+ break;
default:
WARN_ON(1);
goto err;
@@ -195,6 +236,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
#endif
+ case NFT_META_PKTTYPE:
+ case NFT_META_CPU:
+ case NFT_META_IIFGROUP:
+ case NFT_META_OIFGROUP:
break;
default:
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 79ff58cd36dc..799550b476fb 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -33,6 +33,7 @@ struct nft_nat {
enum nft_registers sreg_proto_max:8;
enum nf_nat_manip_type type:8;
u8 family;
+ u16 flags;
};
static void nft_nat_eval(const struct nft_expr *expr,
@@ -71,6 +72,8 @@ static void nft_nat_eval(const struct nft_expr *expr,
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
+ range.flags |= priv->flags;
+
data[NFT_REG_VERDICT].verdict =
nf_nat_setup_info(ct, &range, priv->type);
}
@@ -82,6 +85,7 @@ static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
[NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
+ [NFTA_NAT_FLAGS] = { .type = NLA_U32 },
};
static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
@@ -149,6 +153,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
} else
priv->sreg_proto_max = priv->sreg_proto_min;
+ if (tb[NFTA_NAT_FLAGS]) {
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
+ if (priv->flags & ~NF_NAT_RANGE_MASK)
+ return -EINVAL;
+ }
+
return 0;
}
@@ -183,6 +193,12 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
htonl(priv->sreg_proto_max)))
goto nla_put_failure;
}
+
+ if (priv->flags != 0) {
+ if (nla_put_be32(skb, NFTA_NAT_FLAGS, htonl(priv->flags)))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e1836ff88199..46214f245665 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -234,13 +234,11 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct nft_rbtree_elem *rbe;
struct rb_node *node;
- spin_lock_bh(&nft_rbtree_lock);
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_rbtree_elem_destroy(set, rbe);
}
- spin_unlock_bh(&nft_rbtree_lock);
}
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index f3448c296446..ec8a456092a7 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -17,6 +17,8 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_reject.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
[NFTA_REJECT_TYPE] = { .type = NLA_U32 },
@@ -70,5 +72,40 @@ nla_put_failure:
}
EXPORT_SYMBOL_GPL(nft_reject_dump);
+static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = {
+ [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH,
+ [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH,
+ [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH,
+ [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMP_PKT_FILTERED,
+};
+
+int nft_reject_icmp_code(u8 code)
+{
+ if (code > NFT_REJECT_ICMPX_MAX)
+ return -EINVAL;
+
+ return icmp_code_v4[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmp_code);
+
+
+static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = {
+ [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE,
+ [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH,
+ [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH,
+ [NFT_REJECT_ICMPX_ADMIN_PROHIBITED] = ICMPV6_ADM_PROHIBITED,
+};
+
+int nft_reject_icmpv6_code(u8 code)
+{
+ if (code > NFT_REJECT_ICMPX_MAX)
+ return -EINVAL;
+
+ return icmp_code_v6[code];
+}
+
+EXPORT_SYMBOL_GPL(nft_reject_icmpv6_code);
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index b718a52a4654..7b5f9d58680a 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -14,17 +14,103 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
static void nft_reject_inet_eval(const struct nft_expr *expr,
struct nft_data data[NFT_REG_MAX + 1],
const struct nft_pktinfo *pkt)
{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
switch (pkt->ops->pf) {
case NFPROTO_IPV4:
- return nft_reject_ipv4_eval(expr, data, pkt);
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_ICMPX_UNREACH:
+ nf_send_unreach(pkt->skb,
+ nft_reject_icmp_code(priv->icmp_code));
+ break;
+ }
+ break;
case NFPROTO_IPV6:
- return nft_reject_ipv6_eval(expr, data, pkt);
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_ICMPX_UNREACH:
+ nf_send_unreach6(net, pkt->skb,
+ nft_reject_icmpv6_code(priv->icmp_code),
+ pkt->ops->hooknum);
+ break;
+ }
+ break;
+ }
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+
+static int nft_reject_inet_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ int icmp_code;
+
+ if (tb[NFTA_REJECT_TYPE] == NULL)
+ return -EINVAL;
+
+ priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ case NFT_REJECT_ICMPX_UNREACH:
+ if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
+ return -EINVAL;
+
+ icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
+ if (priv->type == NFT_REJECT_ICMPX_UNREACH &&
+ icmp_code > NFT_REJECT_ICMPX_MAX)
+ return -EINVAL;
+
+ priv->icmp_code = icmp_code;
+ break;
+ case NFT_REJECT_TCP_RST:
+ break;
+ default:
+ return -EINVAL;
}
+ return 0;
+}
+
+static int nft_reject_inet_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_reject *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type)))
+ goto nla_put_failure;
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ case NFT_REJECT_ICMPX_UNREACH:
+ if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
+ goto nla_put_failure;
+ break;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -1;
}
static struct nft_expr_type nft_reject_inet_type;
@@ -32,8 +118,8 @@ static const struct nft_expr_ops nft_reject_inet_ops = {
.type = &nft_reject_inet_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
.eval = nft_reject_inet_eval,
- .init = nft_reject_init,
- .dump = nft_reject_dump,
+ .init = nft_reject_inet_init,
+ .dump = nft_reject_inet_dump,
};
static struct nft_expr_type nft_reject_inet_type __read_mostly = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 272ae4d6fdf4..133eb4772f12 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1101,22 +1101,11 @@ static const struct seq_operations xt_match_seq_ops = {
static int xt_match_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_match_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_match_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
@@ -1165,22 +1154,11 @@ static const struct seq_operations xt_target_seq_ops = {
static int xt_target_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
struct nf_mttg_trav *trav;
- int ret;
-
- trav = kmalloc(sizeof(*trav), GFP_KERNEL);
- if (trav == NULL)
+ trav = __seq_open_private(file, &xt_target_seq_ops, sizeof(*trav));
+ if (!trav)
return -ENOMEM;
- ret = seq_open(file, &xt_target_seq_ops);
- if (ret < 0) {
- kfree(trav);
- return ret;
- }
-
- seq = file->private_data;
- seq->private = trav;
trav->nfproto = (unsigned long)PDE_DATA(inode);
return 0;
}
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 73b73f687c58..02afaf48a729 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -126,7 +126,7 @@ hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
hash = hash ^ (t->proto & info->proto_mask);
- return (((u64)hash * info->hmodulus) >> 32) + info->hoffset;
+ return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
}
static void
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 370adf622cef..604df6fae6fc 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -136,7 +136,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
cfg.est.interval = info->interval;
cfg.est.ewma_log = info->ewma_log;
- ret = gen_new_estimator(&est->bstats, &est->rstats,
+ ret = gen_new_estimator(&est->bstats, NULL, &est->rstats,
&est->lock, &cfg.opt);
if (ret < 0)
goto err2;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index f4af1bfafb1c..96fa26b20b67 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -55,7 +55,8 @@ xt_cluster_hash(const struct nf_conn *ct,
WARN_ON(1);
break;
}
- return (((u64)hash * info->total_nodes) >> 32);
+
+ return reciprocal_scale(hash, info->total_nodes);
}
static inline bool
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 1e634615ab9d..d4bec261e74e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -120,7 +120,7 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
* accounting is enabled, so complain in the hope that someone notices.
*/
if (!nf_ct_acct_enabled(par->net)) {
- pr_warning("Forcing CT accounting to be enabled\n");
+ pr_warn("Forcing CT accounting to be enabled\n");
nf_ct_set_acct(par->net, true);
}
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 47dc6836830a..05fbc2a0be46 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -135,7 +135,7 @@ hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
* give results between [0 and cfg.size-1] and same hash distribution,
* but using a multiply, less expensive than a divide
*/
- return ((u64)hash * ht->cfg.size) >> 32;
+ return reciprocal_scale(hash, ht->cfg.size);
}
static struct dsthash_ent *
@@ -943,7 +943,7 @@ static int __init hashlimit_mt_init(void)
sizeof(struct dsthash_ent), 0, 0,
NULL);
if (!hashlimit_cachep) {
- pr_warning("unable to create slab cache\n");
+ pr_warn("unable to create slab cache\n");
goto err2;
}
return 0;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index d7ca16b8b8df..f440f57a452f 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -13,6 +13,7 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_physdev.h>
#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/br_netfilter.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
@@ -87,6 +88,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_physdev_info *info = par->matchinfo;
+ br_netfilter_enable();
+
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return -EINVAL;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 80c2e2d603e0..5732cd64acc0 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -84,13 +84,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find set identified by id %u to match\n",
- info->match_set.index);
+ pr_warn("Cannot find set identified by id %u to match\n",
+ info->match_set.index);
return -ENOENT;
}
if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
- pr_warning("Protocol error: set match dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
}
@@ -134,13 +133,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par)
index = ip_set_nfnl_get_byindex(par->net, info->match_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find set identified by id %u to match\n",
- info->match_set.index);
+ pr_warn("Cannot find set identified by id %u to match\n",
+ info->match_set.index);
return -ENOENT;
}
if (info->match_set.dim > IPSET_DIM_MAX) {
- pr_warning("Protocol error: set match dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: set match dimension is over the limit!\n");
ip_set_nfnl_put(par->net, info->match_set.index);
return -ERANGE;
}
@@ -230,8 +228,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
if (info->add_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find add_set index %u as target\n",
- info->add_set.index);
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
return -ENOENT;
}
}
@@ -239,8 +237,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
if (info->del_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find del_set index %u as target\n",
- info->del_set.index);
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
@@ -248,8 +246,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
}
if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
- pr_warning("Protocol error: SET target dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -303,8 +300,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
if (info->add_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->add_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find add_set index %u as target\n",
- info->add_set.index);
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
return -ENOENT;
}
}
@@ -312,8 +309,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
if (info->del_set.index != IPSET_INVALID_ID) {
index = ip_set_nfnl_get_byindex(par->net, info->del_set.index);
if (index == IPSET_INVALID_ID) {
- pr_warning("Cannot find del_set index %u as target\n",
- info->del_set.index);
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
return -ENOENT;
@@ -321,8 +318,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par)
}
if (info->add_set.dim > IPSET_DIM_MAX ||
info->del_set.dim > IPSET_DIM_MAX) {
- pr_warning("Protocol error: SET target dimension "
- "is over the limit!\n");
+ pr_warn("Protocol error: SET target dimension is over the limit!\n");
if (info->add_set.index != IPSET_INVALID_ID)
ip_set_nfnl_put(par->net, info->add_set.index);
if (info->del_set.index != IPSET_INVALID_ID)
@@ -370,6 +366,140 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
#define set_target_v2_checkentry set_target_v1_checkentry
#define set_target_v2_destroy set_target_v1_destroy
+/* Revision 3 target */
+
+static unsigned int
+set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ADT_OPT(add_opt, par->family, info->add_set.dim,
+ info->add_set.flags, info->flags, info->timeout);
+ ADT_OPT(del_opt, par->family, info->del_set.dim,
+ info->del_set.flags, 0, UINT_MAX);
+ ADT_OPT(map_opt, par->family, info->map_set.dim,
+ info->map_set.flags, 0, UINT_MAX);
+
+ int ret;
+
+ /* Normalize to fit into jiffies */
+ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
+ add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
+ add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_add(info->add_set.index, skb, par, &add_opt);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_del(info->del_set.index, skb, par, &del_opt);
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ map_opt.cmdflags |= info->flags & (IPSET_FLAG_MAP_SKBMARK |
+ IPSET_FLAG_MAP_SKBPRIO |
+ IPSET_FLAG_MAP_SKBQUEUE);
+ ret = match_set(info->map_set.index, skb, par, &map_opt,
+ info->map_set.flags & IPSET_INV_MATCH);
+ if (!ret)
+ return XT_CONTINUE;
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
+ skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
+ ^ (map_opt.ext.skbmark);
+ if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
+ skb->priority = map_opt.ext.skbprio;
+ if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
+ skb->dev &&
+ skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
+ skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+ }
+ return XT_CONTINUE;
+}
+
+
+static int
+set_target_v3_checkentry(const struct xt_tgchk_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+ ip_set_id_t index;
+
+ if (info->add_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->add_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find add_set index %u as target\n",
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->del_set.index != IPSET_INVALID_ID) {
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->del_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find del_set index %u as target\n",
+ info->del_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->map_set.index != IPSET_INVALID_ID) {
+ if (strncmp(par->table, "mangle", 7)) {
+ pr_warn("--map-set only usable from mangle table\n");
+ return -EINVAL;
+ }
+ if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) |
+ (info->flags & IPSET_FLAG_MAP_SKBQUEUE)) &&
+ !(par->hook_mask & (1 << NF_INET_FORWARD |
+ 1 << NF_INET_LOCAL_OUT |
+ 1 << NF_INET_POST_ROUTING))) {
+ pr_warn("mapping of prio or/and queue is allowed only"
+ "from OUTPUT/FORWARD/POSTROUTING chains\n");
+ return -EINVAL;
+ }
+ index = ip_set_nfnl_get_byindex(par->net,
+ info->map_set.index);
+ if (index == IPSET_INVALID_ID) {
+ pr_warn("Cannot find map_set index %u as target\n",
+ info->map_set.index);
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net,
+ info->del_set.index);
+ return -ENOENT;
+ }
+ }
+
+ if (info->add_set.dim > IPSET_DIM_MAX ||
+ info->del_set.dim > IPSET_DIM_MAX ||
+ info->map_set.dim > IPSET_DIM_MAX) {
+ pr_warn("Protocol error: SET target dimension "
+ "is over the limit!\n");
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+static void
+set_target_v3_destroy(const struct xt_tgdtor_param *par)
+{
+ const struct xt_set_info_target_v3 *info = par->targinfo;
+
+ if (info->add_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->add_set.index);
+ if (info->del_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->del_set.index);
+ if (info->map_set.index != IPSET_INVALID_ID)
+ ip_set_nfnl_put(par->net, info->map_set.index);
+}
+
+
static struct xt_match set_matches[] __read_mostly = {
{
.name = "set",
@@ -497,6 +627,27 @@ static struct xt_target set_targets[] __read_mostly = {
.destroy = set_target_v2_destroy,
.me = THIS_MODULE
},
+ /* --map-set support */
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV4,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
+ {
+ .name = "SET",
+ .revision = 3,
+ .family = NFPROTO_IPV6,
+ .target = set_target_v3,
+ .targetsize = sizeof(struct xt_set_info_target_v3),
+ .checkentry = set_target_v3_checkentry,
+ .destroy = set_target_v3_destroy,
+ .me = THIS_MODULE
+ },
};
static int __init xt_set_init(void)
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index d3c48b14ab94..5699adb97652 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -29,7 +29,6 @@ string_mt(const struct sk_buff *skb, struct xt_action_param *par)
struct ts_state state;
bool invert;
- memset(&state, 0, sizeof(struct ts_state));
invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
return (skb_find_text((struct sk_buff *)skb, conf->from_offset,