aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig23
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c31
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c101
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c182
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c102
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c15
-rw-r--r--net/netfilter/nf_conntrack_acct.c8
-rw-r--r--net/netfilter/nf_conntrack_amanda.c10
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_internals.h11
-rw-r--r--net/netfilter/nf_log_common.c7
-rw-r--r--net/netfilter/nf_queue.c76
-rw-r--r--net/netfilter/nf_tables_api.c636
-rw-r--r--net/netfilter/nf_tables_core.c161
-rw-r--r--net/netfilter/nfnetlink_log.c35
-rw-r--r--net/netfilter/nfnetlink_queue_core.c69
-rw-r--r--net/netfilter/nft_bitwise.c37
-rw-r--r--net/netfilter/nft_byteorder.c40
-rw-r--r--net/netfilter/nft_cmp.c44
-rw-r--r--net/netfilter/nft_compat.c41
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c118
-rw-r--r--net/netfilter/nft_dynset.c265
-rw-r--r--net/netfilter/nft_expr_template.c94
-rw-r--r--net/netfilter/nft_exthdr.c23
-rw-r--r--net/netfilter/nft_hash.c327
-rw-r--r--net/netfilter/nft_immediate.c18
-rw-r--r--net/netfilter/nft_limit.c5
-rw-r--r--net/netfilter/nft_log.c4
-rw-r--r--net/netfilter/nft_lookup.c35
-rw-r--r--net/netfilter/nft_meta.c116
-rw-r--r--net/netfilter/nft_nat.c71
-rw-r--r--net/netfilter/nft_payload.c24
-rw-r--r--net/netfilter/nft_queue.c4
-rw-r--r--net/netfilter/nft_rbtree.c132
-rw-r--r--net/netfilter/nft_redir.c19
-rw-r--r--net/netfilter/nft_reject_inet.c11
-rw-r--r--net/netfilter/xt_TPROXY.c22
-rw-r--r--net/netfilter/xt_cgroup.c2
-rw-r--r--net/netfilter/xt_physdev.c35
-rw-r--r--net/netfilter/xt_set.c4
-rw-r--r--net/netfilter/xt_socket.c129
-rw-r--r--net/netfilter/xt_string.c3
46 files changed, 1987 insertions, 1167 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index b02660fa9eb0..f70e34a68f70 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -438,8 +438,10 @@ config NF_TABLES
To compile it as a module, choose M here.
+if NF_TABLES
+
config NF_TABLES_INET
- depends on NF_TABLES && IPV6
+ depends on IPV6
select NF_TABLES_IPV4
select NF_TABLES_IPV6
tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support"
@@ -447,21 +449,18 @@ config NF_TABLES_INET
This option enables support for a mixed IPv4/IPv6 "inet" table.
config NFT_EXTHDR
- depends on NF_TABLES
tristate "Netfilter nf_tables IPv6 exthdr module"
help
This option adds the "exthdr" expression that you can use to match
IPv6 extension headers.
config NFT_META
- depends on NF_TABLES
tristate "Netfilter nf_tables meta module"
help
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
config NFT_CT
- depends on NF_TABLES
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
@@ -469,42 +468,36 @@ config NFT_CT
connection tracking information such as the flow state.
config NFT_RBTREE
- depends on NF_TABLES
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
config NFT_HASH
- depends on NF_TABLES
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
config NFT_COUNTER
- depends on NF_TABLES
tristate "Netfilter nf_tables counter module"
help
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
config NFT_LOG
- depends on NF_TABLES
tristate "Netfilter nf_tables log module"
help
This option adds the "log" expression that you can use to log
packets matching some criteria.
config NFT_LIMIT
- depends on NF_TABLES
tristate "Netfilter nf_tables limit module"
help
This option adds the "limit" expression that you can use to
ratelimit rule matchings.
config NFT_MASQ
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables masquerade support"
@@ -513,7 +506,6 @@ config NFT_MASQ
to perform NAT in the masquerade flavour.
config NFT_REDIR
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables redirect support"
@@ -522,7 +514,6 @@ config NFT_REDIR
to perform NAT in the redirect flavour.
config NFT_NAT
- depends on NF_TABLES
depends on NF_CONNTRACK
select NF_NAT
tristate "Netfilter nf_tables nat module"
@@ -531,8 +522,6 @@ config NFT_NAT
typical Network Address Translation (NAT) packet transformations.
config NFT_QUEUE
- depends on NF_TABLES
- depends on NETFILTER_XTABLES
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
help
@@ -540,7 +529,6 @@ config NFT_QUEUE
infrastructure (also known as NFQUEUE) from nftables.
config NFT_REJECT
- depends on NF_TABLES
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
help
@@ -554,7 +542,6 @@ config NFT_REJECT_INET
tristate
config NFT_COMPAT
- depends on NF_TABLES
depends on NETFILTER_XTABLES
tristate "Netfilter x_tables over nf_tables module"
help
@@ -562,6 +549,8 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+endif # NF_TABLES
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
@@ -951,7 +940,7 @@ comment "Xtables matches"
config NETFILTER_XT_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
- depends on NETFILTER_ADVANCED
+ default m if NETFILTER_ADVANCED=n
---help---
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 89f73a9e9874..a87d8b8ec730 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# nf_tables
nf_tables-objs += nf_tables_core.o nf_tables_api.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index fea9ef566427..e6163017c42d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -120,12 +120,8 @@ EXPORT_SYMBOL(nf_unregister_hooks);
unsigned int nf_iterate(struct list_head *head,
struct sk_buff *skb,
- unsigned int hook,
- const struct net_device *indev,
- const struct net_device *outdev,
- struct nf_hook_ops **elemp,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
+ struct nf_hook_state *state,
+ struct nf_hook_ops **elemp)
{
unsigned int verdict;
@@ -134,19 +130,19 @@ unsigned int nf_iterate(struct list_head *head,
* function because of risk of continuing from deleted element.
*/
list_for_each_entry_continue_rcu((*elemp), head, list) {
- if (hook_thresh > (*elemp)->priority)
+ if (state->thresh > (*elemp)->priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
+ verdict = (*elemp)->hook(*elemp, skb, state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
- (*elemp)->hook, hook);
+ (*elemp)->hook, state->hook);
continue;
}
#endif
@@ -161,11 +157,7 @@ repeat:
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
{
struct nf_hook_ops *elem;
unsigned int verdict;
@@ -174,10 +166,11 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
- elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list);
+ elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
+ struct nf_hook_ops, list);
next_hook:
- verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
- outdev, &elem, okfn, hook_thresh);
+ verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
+ &elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -186,8 +179,8 @@ next_hook:
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
- int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
- verdict >> NF_VERDICT_QBITS);
+ int err = nf_queue(skb, elem, state,
+ verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 758b002130d9..380ef5148ea1 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -19,6 +19,7 @@
#include <net/netlink.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
@@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
#include "ip_set_hash_gen.h"
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static const char *get_physindev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physindev(skb);
+
+ return dev ? dev->name : NULL;
+}
+
+static const char *get_phyoutdev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physoutdev(skb);
+
+ return dev ? dev->name : NULL;
+}
+#endif
+
static int
hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
e.ip &= ip_set_netmask(e.cidr);
#define IFACE(dir) (par->dir ? par->dir->name : NULL)
-#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL)
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
- if (!nf_bridge)
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
e.physdev = 1;
#else
e.iface = NULL;
@@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
- if (!nf_bridge)
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+
e.physdev = 1;
#else
e.iface = NULL;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b87ca32efa0b..5d2b806a862e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
}
@@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
}
}
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+ int conn_reuse_mode)
+{
+ /* Controlled (FTP DATA or persistence)? */
+ if (cp->control)
+ return false;
+
+ switch (cp->protocol) {
+ case IPPROTO_TCP:
+ return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+ ((conn_reuse_mode & 2) &&
+ (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+ (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+ case IPPROTO_SCTP:
+ return cp->state == IP_VS_SCTP_S_CLOSED;
+ default:
+ return false;
+ }
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@@ -1246,8 +1272,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
*/
static unsigned int
ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET);
}
@@ -1258,8 +1283,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET);
}
@@ -1273,8 +1297,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
@@ -1285,8 +1308,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
@@ -1585,6 +1607,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_conn *cp;
int ret, pkts;
struct netns_ipvs *ipvs;
+ int conn_reuse_mode;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1653,10 +1676,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
- if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
- is_new_conn(skb, &iph)) {
- ip_vs_conn_expire_now(cp);
+ conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+ if (conn_reuse_mode && !iph.fragoffs &&
+ is_new_conn(skb, &iph) && cp &&
+ ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) ||
+ unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
cp = NULL;
}
@@ -1738,9 +1765,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
static unsigned int
ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET);
}
@@ -1751,8 +1776,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET);
}
@@ -1765,9 +1789,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
@@ -1778,8 +1800,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
@@ -1798,8 +1819,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
int r;
struct net *net;
@@ -1820,8 +1840,7 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
int r;
struct net *net;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ed99448671c3..49532672f66d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net)
}
static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
{
-#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
spin_lock_bh(&src->lock);
@@ -747,13 +747,28 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
}
static void
+ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
+{
+ dst->conns = (u32)src->conns;
+ dst->inpkts = (u32)src->inpkts;
+ dst->outpkts = (u32)src->outpkts;
+ dst->inbytes = src->inbytes;
+ dst->outbytes = src->outbytes;
+ dst->cps = (u32)src->cps;
+ dst->inpps = (u32)src->inpps;
+ dst->outpps = (u32)src->outpps;
+ dst->inbps = (u32)src->inbps;
+ dst->outbps = (u32)src->outbps;
+}
+
+static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
/* get current counters as zero point, rates are zeroed */
-#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
IP_VS_ZERO_STATS_COUNTER(conns);
IP_VS_ZERO_STATS_COUNTER(inpkts);
@@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_reuse_mode",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = {
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
- struct ip_vs_stats_user show;
+ struct ip_vs_kstats show;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
" Conns Packets Packets Bytes Bytes\n");
ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
- show.inpkts, show.outpkts,
- (unsigned long long) show.inbytes,
- (unsigned long long) show.outbytes);
-
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)show.conns,
+ (unsigned long long)show.inpkts,
+ (unsigned long long)show.outpkts,
+ (unsigned long long)show.inbytes,
+ (unsigned long long)show.outbytes);
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, "%8X %8X %8X %16X %16X\n",
- show.cps, show.inpps, show.outpps,
- show.inbps, show.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
+ (unsigned long long)show.cps,
+ (unsigned long long)show.inpps,
+ (unsigned long long)show.outpps,
+ (unsigned long long)show.inbps,
+ (unsigned long long)show.outbps);
return 0;
}
@@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
- struct ip_vs_stats_user rates;
+ struct ip_vs_kstats kstats;
int i;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
do {
start = u64_stats_fetch_begin_irq(&u->syncp);
- inbytes = u->ustats.inbytes;
- outbytes = u->ustats.outbytes;
+ conns = u->cnt.conns;
+ inpkts = u->cnt.inpkts;
+ outpkts = u->cnt.outpkts;
+ inbytes = u->cnt.inbytes;
+ outbytes = u->cnt.outbytes;
} while (u64_stats_fetch_retry_irq(&u->syncp, start));
- seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
- i, u->ustats.conns, u->ustats.inpkts,
- u->ustats.outpkts, (__u64)inbytes,
- (__u64)outbytes);
+ seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
+ i, (u64)conns, (u64)inpkts,
+ (u64)outpkts, (u64)inbytes,
+ (u64)outbytes);
}
- spin_lock_bh(&tot_stats->lock);
-
- seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
- tot_stats->ustats.conns, tot_stats->ustats.inpkts,
- tot_stats->ustats.outpkts,
- (unsigned long long) tot_stats->ustats.inbytes,
- (unsigned long long) tot_stats->ustats.outbytes);
-
- ip_vs_read_estimator(&rates, tot_stats);
+ ip_vs_copy_stats(&kstats, tot_stats);
- spin_unlock_bh(&tot_stats->lock);
+ seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)kstats.conns,
+ (unsigned long long)kstats.inpkts,
+ (unsigned long long)kstats.outpkts,
+ (unsigned long long)kstats.inbytes,
+ (unsigned long long)kstats.outbytes);
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, " %8X %8X %8X %16X %16X\n",
- rates.cps,
- rates.inpps,
- rates.outpps,
- rates.inbps,
- rates.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
+ kstats.cps,
+ kstats.inpps,
+ kstats.outpps,
+ kstats.inbps,
+ kstats.outbps);
return 0;
}
@@ -2400,6 +2426,7 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
+ struct ip_vs_kstats kstats;
sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol;
@@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
dst->num_dests = src->num_dests;
- ip_vs_copy_stats(&dst->stats, &src->stats);
+ ip_vs_copy_stats(&kstats, &src->stats);
+ ip_vs_export_stats_user(&dst->stats, &kstats);
}
static inline int
@@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
int count = 0;
struct ip_vs_dest *dest;
struct ip_vs_dest_entry entry;
+ struct ip_vs_kstats kstats;
memset(&entry, 0, sizeof(entry));
list_for_each_entry(dest, &svc->destinations, n_list) {
@@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
entry.activeconns = atomic_read(&dest->activeconns);
entry.inactconns = atomic_read(&dest->inactconns);
entry.persistconns = atomic_read(&dest->persistconns);
- ip_vs_copy_stats(&entry.stats, &dest->stats);
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ ip_vs_export_stats_user(&entry.stats, &kstats);
if (copy_to_user(&uptr->entrytable[count],
&entry, sizeof(entry))) {
ret = -EFAULT;
@@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
- struct ip_vs_stats *stats)
+ struct ip_vs_kstats *kstats)
{
- struct ip_vs_stats_user ustats;
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
if (!nl_stats)
return -EMSGSIZE;
- ip_vs_copy_stats(&ustats, stats);
-
- if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
+ if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
+ goto nla_put_failure;
+ nla_nest_end(skb, nl_stats);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_stats);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
+ struct ip_vs_kstats *kstats)
+{
+ struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
+ if (!nl_stats)
+ return -EMSGSIZE;
+
+ if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
goto nla_put_failure;
nla_nest_end(skb, nl_stats);
@@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
+ struct ip_vs_kstats kstats;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+ ip_vs_copy_stats(&kstats, &svc->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_service);
@@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
{
struct nlattr *nl_dest;
+ struct ip_vs_kstats kstats;
nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
if (!nl_dest)
@@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
atomic_read(&dest->persistconns)) ||
nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_dest);
@@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_pmtu_disc = 1;
tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
tbl[idx++].data = &ipvs->sysctl_backup_only;
+ ipvs->sysctl_conn_reuse_mode = 1;
+ tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 1425e9a924c4..ef0eb0a8d552 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -45,17 +45,19 @@
NOTES.
- * The stored value for average bps is scaled by 2^5, so that maximal
- rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+ * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
- * A lot code is taken from net/sched/estimator.c
+ * Netlink users can see 64-bit values but sockopt users are restricted
+ to 32-bit values for conns, packets, bps, cps and pps.
+
+ * A lot of code is taken from net/core/gen_estimator.c
*/
/*
* Make a summary from each cpu
*/
-static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
@@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
+
if (add) {
- sum->conns += s->ustats.conns;
- sum->inpkts += s->ustats.inpkts;
- sum->outpkts += s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- inbytes = s->ustats.inbytes;
- outbytes = s->ustats.outbytes;
+ conns = s->cnt.conns;
+ inpkts = s->cnt.inpkts;
+ outpkts = s->cnt.outpkts;
+ inbytes = s->cnt.inbytes;
+ outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
+ sum->conns += conns;
+ sum->inpkts += inpkts;
+ sum->outpkts += outpkts;
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
add = true;
- sum->conns = s->ustats.conns;
- sum->inpkts = s->ustats.inpkts;
- sum->outpkts = s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- sum->inbytes = s->ustats.inbytes;
- sum->outbytes = s->ustats.outbytes;
+ sum->conns = s->cnt.conns;
+ sum->inpkts = s->cnt.inpkts;
+ sum->outpkts = s->cnt.outpkts;
+ sum->inbytes = s->cnt.inbytes;
+ sum->outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
}
}
@@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg)
{
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
- u32 n_conns;
- u32 n_inpkts, n_outpkts;
- u64 n_inbytes, n_outbytes;
- u32 rate;
+ u64 rate;
struct net *net = (struct net *)arg;
struct netns_ipvs *ipvs;
@@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg)
s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
- ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
- n_conns = s->ustats.conns;
- n_inpkts = s->ustats.inpkts;
- n_outpkts = s->ustats.outpkts;
- n_inbytes = s->ustats.inbytes;
- n_outbytes = s->ustats.outbytes;
+ ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
/* scaled by 2^10, but divided 2 seconds */
- rate = (n_conns - e->last_conns) << 9;
- e->last_conns = n_conns;
- e->cps += ((long)rate - (long)e->cps) >> 2;
-
- rate = (n_inpkts - e->last_inpkts) << 9;
- e->last_inpkts = n_inpkts;
- e->inpps += ((long)rate - (long)e->inpps) >> 2;
-
- rate = (n_outpkts - e->last_outpkts) << 9;
- e->last_outpkts = n_outpkts;
- e->outpps += ((long)rate - (long)e->outpps) >> 2;
-
- rate = (n_inbytes - e->last_inbytes) << 4;
- e->last_inbytes = n_inbytes;
- e->inbps += ((long)rate - (long)e->inbps) >> 2;
-
- rate = (n_outbytes - e->last_outbytes) << 4;
- e->last_outbytes = n_outbytes;
- e->outbps += ((long)rate - (long)e->outbps) >> 2;
+ rate = (s->kstats.conns - e->last_conns) << 9;
+ e->last_conns = s->kstats.conns;
+ e->cps += ((s64)rate - (s64)e->cps) >> 2;
+
+ rate = (s->kstats.inpkts - e->last_inpkts) << 9;
+ e->last_inpkts = s->kstats.inpkts;
+ e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
+
+ rate = (s->kstats.outpkts - e->last_outpkts) << 9;
+ e->last_outpkts = s->kstats.outpkts;
+ e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
+
+ /* scaled by 2^5, but divided 2 seconds */
+ rate = (s->kstats.inbytes - e->last_inbytes) << 4;
+ e->last_inbytes = s->kstats.inbytes;
+ e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
+
+ rate = (s->kstats.outbytes - e->last_outbytes) << 4;
+ e->last_outbytes = s->kstats.outbytes;
+ e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
@@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
- struct ip_vs_stats_user *u = &stats->ustats;
+ struct ip_vs_kstats *k = &stats->kstats;
/* reset counters, caller must hold the stats->lock lock */
- est->last_inbytes = u->inbytes;
- est->last_outbytes = u->outbytes;
- est->last_conns = u->conns;
- est->last_inpkts = u->inpkts;
- est->last_outpkts = u->outpkts;
+ est->last_inbytes = k->inbytes;
+ est->last_outbytes = k->outbytes;
+ est->last_conns = k->conns;
+ est->last_inpkts = k->inpkts;
+ est->last_outpkts = k->outpkts;
est->cps = 0;
est->inpps = 0;
est->outpps = 0;
@@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
}
/* Get decoded rates */
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
- struct ip_vs_stats *stats)
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *e = &stats->est;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d93ceeb3ef04..19b9cce6c210 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
- else
+ if (cp && ((cp->dport != dport) ||
+ !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+ if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ cp = NULL;
+ } else {
+ /* This is the expiration message for the
+ * connection that was already replaced, so we
+ * just ignore it.
+ */
+ __ip_vs_conn_put(cp);
+ kfree(param->pe_data);
+ return;
+ }
+ }
+ } else {
cp = ip_vs_ct_in_get(param);
+ }
if (cp) {
/* Free pe_data */
@@ -1388,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
mreq.imr_ifindex = dev->ifindex;
+ rtnl_lock();
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
+ rtnl_unlock();
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3aedbda7658a..19986ec5f21a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
struct sock *sk = skb->sk;
struct rtable *ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ if (!skb->dev && sk && sk_fullsock(sk))
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
@@ -536,8 +536,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_update_conntrack(skb, cp, 1);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
@@ -554,8 +554,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
@@ -924,7 +924,8 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct net *net = skb_net(skb);
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
@@ -991,7 +992,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = ttl;
- ip_select_ident(skb, NULL);
+ ip_select_ident(net, skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index a4b5e2a435ac..45da11afa785 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -47,9 +47,11 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
return 0;
counter = acct->counter;
- return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)atomic64_read(&counter[dir].packets),
- (unsigned long long)atomic64_read(&counter[dir].bytes));
+ seq_printf(s, "packets=%llu bytes=%llu ",
+ (unsigned long long)atomic64_read(&counter[dir].packets),
+ (unsigned long long)atomic64_read(&counter[dir].bytes));
+
+ return 0;
};
EXPORT_SYMBOL_GPL(seq_print_acct);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index b8b95f4027ca..57a26cc90c9f 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -88,7 +88,6 @@ static int amanda_help(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- struct ts_state ts;
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple *tuple;
unsigned int dataoff, start, stop, off, i;
@@ -113,23 +112,20 @@ static int amanda_help(struct sk_buff *skb,
return NF_ACCEPT;
}
- memset(&ts, 0, sizeof(ts));
start = skb_find_text(skb, dataoff, skb->len,
- search[SEARCH_CONNECT].ts, &ts);
+ search[SEARCH_CONNECT].ts);
if (start == UINT_MAX)
goto out;
start += dataoff + search[SEARCH_CONNECT].len;
- memset(&ts, 0, sizeof(ts));
stop = skb_find_text(skb, start, skb->len,
- search[SEARCH_NEWLINE].ts, &ts);
+ search[SEARCH_NEWLINE].ts);
if (stop == UINT_MAX)
goto out;
stop += start;
for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
- memset(&ts, 0, sizeof(ts));
- off = skb_find_text(skb, start, stop, search[i].ts, &ts);
+ off = skb_find_text(skb, start, stop, search[i].ts);
if (off == UINT_MAX)
continue;
off += start + search[i].len;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 91a1837acd0e..7a17070c5dab 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -561,7 +561,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
helper->expect_policy[expect->class].name);
}
- return seq_putc(s, '\n');
+ seq_putc(s, '\n');
+
+ return 0;
}
static const struct seq_operations exp_seq_ops = {
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 61a3c927e63c..ea7f36784b3d 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,16 +14,11 @@
/* core.c */
unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
- unsigned int hook, const struct net_device *indev,
- const struct net_device *outdev,
- struct nf_hook_ops **elemp,
- int (*okfn)(struct sk_buff *), int hook_thresh);
+ struct nf_hook_state *state, struct nf_hook_ops **elemp);
/* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf,
- unsigned int hook, struct net_device *indev,
- struct net_device *outdev, int (*okfn)(struct sk_buff *),
- unsigned int queuenum);
+int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
+ struct nf_hook_state *state, unsigned int queuenum);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a2233e77cf39..a5aa5967b8e1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -17,6 +17,7 @@
#include <net/route.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
@@ -133,7 +134,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
{
- if (!sk || sk->sk_state == TCP_TIME_WAIT)
+ if (!sk || !sk_fullsock(sk))
return;
read_lock_bh(&sk->sk_callback_lock);
@@ -163,10 +164,10 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
const struct net_device *physindev;
const struct net_device *physoutdev;
- physindev = skb->nf_bridge->physindev;
+ physindev = nf_bridge_get_physindev(skb);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
+ physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && out != physoutdev)
nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4c8b68e5fa16..2e88032cd5ad 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <net/protocol.h>
@@ -47,19 +48,25 @@ EXPORT_SYMBOL(nf_unregister_queue_handler);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
+ struct nf_hook_state *state = &entry->state;
+
/* Release those devices we held, or Alexey will kill me. */
- if (entry->indev)
- dev_put(entry->indev);
- if (entry->outdev)
- dev_put(entry->outdev);
+ if (state->in)
+ dev_put(state->in);
+ if (state->out)
+ dev_put(state->out);
+ if (state->sk)
+ sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+ struct net_device *physdev;
- if (nf_bridge->physindev)
- dev_put(nf_bridge->physindev);
- if (nf_bridge->physoutdev)
- dev_put(nf_bridge->physoutdev);
+ physdev = nf_bridge_get_physindev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
+ physdev = nf_bridge_get_physoutdev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
}
#endif
/* Drop reference to owner of hook which queued us. */
@@ -70,22 +77,25 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
+ struct nf_hook_state *state = &entry->state;
+
if (!try_module_get(entry->elem->owner))
return false;
- if (entry->indev)
- dev_hold(entry->indev);
- if (entry->outdev)
- dev_hold(entry->outdev);
+ if (state->in)
+ dev_hold(state->in);
+ if (state->out)
+ dev_hold(state->out);
+ if (state->sk)
+ sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
struct net_device *physdev;
- physdev = nf_bridge->physindev;
+ physdev = nf_bridge_get_physindev(entry->skb);
if (physdev)
dev_hold(physdev);
- physdev = nf_bridge->physoutdev;
+ physdev = nf_bridge_get_physoutdev(entry->skb);
if (physdev)
dev_hold(physdev);
}
@@ -100,12 +110,9 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
* through nf_reinject().
*/
int nf_queue(struct sk_buff *skb,
- struct nf_hook_ops *elem,
- u_int8_t pf, unsigned int hook,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- unsigned int queuenum)
+ struct nf_hook_ops *elem,
+ struct nf_hook_state *state,
+ unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -121,7 +128,7 @@ int nf_queue(struct sk_buff *skb,
goto err_unlock;
}
- afinfo = nf_get_afinfo(pf);
+ afinfo = nf_get_afinfo(state->pf);
if (!afinfo)
goto err_unlock;
@@ -134,11 +141,7 @@ int nf_queue(struct sk_buff *skb,
*entry = (struct nf_queue_entry) {
.skb = skb,
.elem = elem,
- .pf = pf,
- .hook = hook,
- .indev = indev,
- .outdev = outdev,
- .okfn = okfn,
+ .state = *state,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -184,30 +187,29 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
}
if (verdict == NF_ACCEPT) {
- afinfo = nf_get_afinfo(entry->pf);
+ afinfo = nf_get_afinfo(entry->state.pf);
if (!afinfo || afinfo->reroute(skb, entry) < 0)
verdict = NF_DROP;
}
+ entry->state.thresh = INT_MIN;
+
if (verdict == NF_ACCEPT) {
next_hook:
- verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
- skb, entry->hook,
- entry->indev, entry->outdev, &elem,
- entry->okfn, INT_MIN);
+ verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook],
+ skb, &entry->state, &elem);
}
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
local_bh_disable();
- entry->okfn(skb);
+ entry->state.okfn(entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, elem, entry->pf, entry->hook,
- entry->indev, entry->outdev, entry->okfn,
- verdict >> NF_VERDICT_QBITS);
+ err = nf_queue(skb, elem, &entry->state,
+ verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ac1a9528dbf2..78af83bc9c8e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx)
static inline bool
nft_rule_is_active(struct net *net, const struct nft_rule *rule)
{
- return (rule->genmask & (1 << net->nft.gencursor)) == 0;
-}
-
-static inline int gencursor_next(struct net *net)
-{
- return net->nft.gencursor+1 == 1 ? 1 : 0;
+ return (rule->genmask & nft_genmask_cur(net)) == 0;
}
static inline int
nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
{
- return (rule->genmask & (1 << gencursor_next(net))) == 0;
+ return (rule->genmask & nft_genmask_next(net)) == 0;
}
static inline void
nft_rule_activate_next(struct net *net, struct nft_rule *rule)
{
/* Now inactive, will be active in the future */
- rule->genmask = (1 << net->nft.gencursor);
+ rule->genmask = nft_genmask_cur(net);
}
static inline void
nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
{
- rule->genmask = (1 << gencursor_next(net));
+ rule->genmask = nft_genmask_next(net);
}
static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
{
- rule->genmask &= ~(1 << gencursor_next(net));
+ rule->genmask &= ~nft_genmask_next(net);
}
static int
@@ -401,7 +396,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
}
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
- [NFTA_TABLE_NAME] = { .type = NLA_STRING },
+ [NFTA_TABLE_NAME] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
@@ -686,26 +682,28 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
if (!try_module_get(afi->owner))
return -EAFNOSUPPORT;
- table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
- if (table == NULL) {
- module_put(afi->owner);
- return -ENOMEM;
- }
+ err = -ENOMEM;
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (table == NULL)
+ goto err1;
- nla_strlcpy(table->name, name, nla_len(name));
+ nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
- if (err < 0) {
- kfree(table);
- module_put(afi->owner);
- return err;
- }
+ if (err < 0)
+ goto err2;
+
list_add_tail_rcu(&table->list, &afi->tables);
return 0;
+err2:
+ kfree(table);
+err1:
+ module_put(afi->owner);
+ return err;
}
static int nft_flush_table(struct nft_ctx *ctx)
@@ -1351,6 +1349,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
rcu_assign_pointer(basechain->stats, stats);
}
+ write_pnet(&basechain->pnet, net);
basechain->type = type;
chain = &basechain->chain;
@@ -1378,7 +1377,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
- chain->net = net;
chain->table = table;
nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
@@ -1547,6 +1545,23 @@ nla_put_failure:
return -1;
};
+int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
+ const struct nft_expr *expr)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, attr);
+ if (!nest)
+ goto nla_put_failure;
+ if (nf_tables_fill_expr_info(skb, expr) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
struct nft_expr_info {
const struct nft_expr_ops *ops;
struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
@@ -1624,6 +1639,39 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
module_put(expr->ops->type->owner);
}
+struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
+ const struct nlattr *nla)
+{
+ struct nft_expr_info info;
+ struct nft_expr *expr;
+ int err;
+
+ err = nf_tables_expr_parse(ctx, nla, &info);
+ if (err < 0)
+ goto err1;
+
+ err = -ENOMEM;
+ expr = kzalloc(info.ops->size, GFP_KERNEL);
+ if (expr == NULL)
+ goto err2;
+
+ err = nf_tables_newexpr(ctx, &info, expr);
+ if (err < 0)
+ goto err2;
+
+ return expr;
+err2:
+ module_put(info.ops->type->owner);
+err1:
+ return ERR_PTR(err);
+}
+
+void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
+{
+ nf_tables_expr_destroy(ctx, expr);
+ kfree(expr);
+}
+
/*
* Rules
*/
@@ -1705,12 +1753,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
if (list == NULL)
goto nla_put_failure;
nft_rule_for_each_expr(expr, next, rule) {
- struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
- if (elem == NULL)
+ if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
goto nla_put_failure;
- if (nf_tables_fill_expr_info(skb, expr) < 0)
- goto nla_put_failure;
- nla_nest_end(skb, elem);
}
nla_nest_end(skb, list);
@@ -2161,7 +2205,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
features = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
- features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
}
bops = NULL;
@@ -2218,6 +2262,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_POLICY] = { .type = NLA_U32 },
[NFTA_SET_DESC] = { .type = NLA_NESTED },
[NFTA_SET_ID] = { .type = NLA_U32 },
+ [NFTA_SET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2368,6 +2414,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (set->timeout &&
+ nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
+ goto nla_put_failure;
+ if (set->gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ goto nla_put_failure;
+
if (set->policy != NFT_SET_POL_PERFORMANCE) {
if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
goto nla_put_failure;
@@ -2580,7 +2633,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, dtype, flags, policy;
+ u64 timeout;
+ u32 ktype, dtype, flags, policy, gc_int;
struct nft_set_desc desc;
int err;
@@ -2600,15 +2654,20 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
}
desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
- if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
+ if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;
flags = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
- NFT_SET_INTERVAL | NFT_SET_MAP))
+ NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
+ NFT_SET_MAP | NFT_SET_EVAL))
return -EINVAL;
+ /* Only one of both operations is supported */
+ if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) ==
+ (NFT_SET_MAP | NFT_SET_EVAL))
+ return -EOPNOTSUPP;
}
dtype = 0;
@@ -2625,14 +2684,26 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
- if (desc.dlen == 0 ||
- desc.dlen > FIELD_SIZEOF(struct nft_data, data))
+ if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;
} else
- desc.dlen = sizeof(struct nft_data);
+ desc.dlen = sizeof(struct nft_verdict);
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ timeout = 0;
+ if (nla[NFTA_SET_TIMEOUT] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+ }
+ gc_int = 0;
+ if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ }
+
policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
@@ -2692,6 +2763,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
goto err2;
INIT_LIST_HEAD(&set->bindings);
+ write_pnet(&set->pnet, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
@@ -2700,6 +2772,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->flags = flags;
set->size = desc.size;
set->policy = policy;
+ set->timeout = timeout;
+ set->gc_int = gc_int;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2768,12 +2842,14 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
enum nft_registers dreg;
dreg = nft_type_to_reg(set->dtype);
- return nft_validate_data_load(ctx, dreg, &elem->data,
- set->dtype == NFT_DATA_VERDICT ?
- NFT_DATA_VERDICT : NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext),
+ set->dtype == NFT_DATA_VERDICT ?
+ NFT_DATA_VERDICT : NFT_DATA_VALUE,
+ set->dlen);
}
int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
@@ -2785,12 +2861,13 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
return -EBUSY;
- if (set->flags & NFT_SET_MAP) {
+ if (binding->flags & NFT_SET_MAP) {
/* If the set is already bound to the same chain all
* jumps are already validated for that chain.
*/
list_for_each_entry(i, &set->bindings, list) {
- if (i->chain == binding->chain)
+ if (binding->flags & NFT_SET_MAP &&
+ i->chain == binding->chain)
goto bind;
}
@@ -2824,6 +2901,35 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
nf_tables_set_destroy(ctx, set);
}
+const struct nft_set_ext_type nft_set_ext_types[] = {
+ [NFT_SET_EXT_KEY] = {
+ .align = __alignof__(u32),
+ },
+ [NFT_SET_EXT_DATA] = {
+ .align = __alignof__(u32),
+ },
+ [NFT_SET_EXT_EXPR] = {
+ .align = __alignof__(struct nft_expr),
+ },
+ [NFT_SET_EXT_FLAGS] = {
+ .len = sizeof(u8),
+ .align = __alignof__(u8),
+ },
+ [NFT_SET_EXT_TIMEOUT] = {
+ .len = sizeof(u64),
+ .align = __alignof__(u64),
+ },
+ [NFT_SET_EXT_EXPIRATION] = {
+ .len = sizeof(unsigned long),
+ .align = __alignof__(unsigned long),
+ },
+ [NFT_SET_EXT_USERDATA] = {
+ .len = sizeof(struct nft_userdata),
+ .align = __alignof__(struct nft_userdata),
+ },
+};
+EXPORT_SYMBOL_GPL(nft_set_ext_types);
+
/*
* Set elements
*/
@@ -2832,6 +2938,9 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
+ [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2870,6 +2979,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -2877,20 +2987,52 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
if (nest == NULL)
goto nla_put_failure;
- if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
- set->klen) < 0)
+ if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
+ NFT_DATA_VALUE, set->klen) < 0)
goto nla_put_failure;
- if (set->flags & NFT_SET_MAP &&
- !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
- nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
set->dlen) < 0)
goto nla_put_failure;
- if (elem->flags != 0)
- if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) &&
+ nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
+ htonl(*nft_set_ext_flags(ext))))
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+ cpu_to_be64(*nft_set_ext_timeout(ext))))
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ unsigned long expires, now = jiffies;
+
+ expires = *nft_set_ext_expiration(ext);
+ if (time_before(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ cpu_to_be64(jiffies_to_msecs(expires))))
goto nla_put_failure;
+ }
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+ struct nft_userdata *udata;
+
+ udata = nft_set_ext_userdata(ext);
+ if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+ udata->len + 1, udata->data))
+ goto nla_put_failure;
+ }
nla_nest_end(skb, nest);
return 0;
@@ -3111,20 +3253,65 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
return trans;
}
+void *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const u32 *key, const u32 *data,
+ u64 timeout, gfp_t gfp)
+{
+ struct nft_set_ext *ext;
+ void *elem;
+
+ elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
+ if (elem == NULL)
+ return NULL;
+
+ ext = nft_set_elem_ext(set, elem);
+ nft_set_ext_init(ext, tmpl);
+
+ memcpy(nft_set_ext_key(ext), key, set->klen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ memcpy(nft_set_ext_data(ext), data, set->dlen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
+ *nft_set_ext_expiration(ext) =
+ jiffies + msecs_to_jiffies(timeout);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+ *nft_set_ext_timeout(ext) = timeout;
+
+ return elem;
+}
+
+void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+ nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+
+ kfree(elem);
+}
+EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc d1, d2;
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_ext *ext;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_userdata *udata;
+ struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
+ u64 timeout;
+ u32 flags;
+ u8 ulen;
int err;
- if (set->size && set->nelems == set->size)
- return -ENFILE;
-
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -3133,38 +3320,59 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_KEY] == NULL)
return -EINVAL;
- elem.flags = 0;
+ nft_set_ext_prepare(&tmpl);
+
+ flags = 0;
if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
- elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
- if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+ flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+ if (flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_INTERVAL) &&
+ flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
+ if (flags != 0)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
}
if (set->flags & NFT_SET_MAP) {
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
- !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+ !(flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
if (nla[NFTA_SET_ELEM_DATA] != NULL &&
- elem.flags & NFT_SET_ELEM_INTERVAL_END)
+ flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_DATA] != NULL)
return -EINVAL;
}
- err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
+ timeout = 0;
+ if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+ } else if (set->flags & NFT_SET_TIMEOUT) {
+ timeout = set->timeout;
+ }
+
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
err = -EINVAL;
if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
goto err2;
- err = -EEXIST;
- if (set->ops->get(set, &elem) == 0)
- goto err2;
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len);
+ if (timeout > 0) {
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (timeout != set->timeout)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ }
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
- err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+ err = nft_data_init(ctx, &data, sizeof(data), &d2,
+ nla[NFTA_SET_ELEM_DATA]);
if (err < 0)
goto err2;
@@ -3180,32 +3388,68 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
.chain = (struct nft_chain *)binding->chain,
};
- err = nft_validate_data_load(&bind_ctx, dreg,
- &elem.data, d2.type);
+ if (!(binding->flags & NFT_SET_MAP))
+ continue;
+
+ err = nft_validate_register_store(&bind_ctx, dreg,
+ &data,
+ d2.type, d2.len);
if (err < 0)
goto err3;
}
+
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
+ }
+
+ /* The full maximum length of userdata can exceed the maximum
+ * offset value (U8_MAX) for following extensions, therefor it
+ * must be the last extension added.
+ */
+ ulen = 0;
+ if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+ ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+ if (ulen > 0)
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
+ }
+
+ err = -ENOMEM;
+ elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
+ timeout, GFP_KERNEL);
+ if (elem.priv == NULL)
+ goto err3;
+
+ ext = nft_set_elem_ext(set, elem.priv);
+ if (flags)
+ *nft_set_ext_flags(ext) = flags;
+ if (ulen > 0) {
+ udata = nft_set_ext_userdata(ext);
+ udata->len = ulen - 1;
+ nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
- goto err3;
+ goto err4;
+ ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
err = set->ops->insert(set, &elem);
if (err < 0)
- goto err4;
+ goto err5;
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
-err4:
+err5:
kfree(trans);
+err4:
+ kfree(elem.priv);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
- nft_data_uninit(&elem.data, d2.type);
+ nft_data_uninit(&data, d2.type);
err2:
- nft_data_uninit(&elem.key, d1.type);
+ nft_data_uninit(&elem.key.val, d1.type);
err1:
return err;
}
@@ -3241,11 +3485,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
+ return -ENFILE;
+
err = nft_add_set_elem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ atomic_dec(&set->nelems);
break;
-
- set->nelems++;
+ }
}
return err;
}
@@ -3268,7 +3516,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_KEY] == NULL)
goto err1;
- err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]);
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
@@ -3276,21 +3525,26 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
goto err2;
- err = set->ops->get(set, &elem);
- if (err < 0)
- goto err2;
-
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
+ elem.priv = set->ops->deactivate(set, &elem);
+ if (elem.priv == NULL) {
+ err = -ENOENT;
+ goto err3;
+ }
+
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+
+err3:
+ kfree(trans);
err2:
- nft_data_uninit(&elem.key, desc.type);
+ nft_data_uninit(&elem.key.val, desc.type);
err1:
return err;
}
@@ -3322,11 +3576,36 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
break;
- set->nelems--;
+ set->ndeact++;
}
return err;
}
+void nft_set_gc_batch_release(struct rcu_head *rcu)
+{
+ struct nft_set_gc_batch *gcb;
+ unsigned int i;
+
+ gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+ for (i = 0; i < gcb->head.cnt; i++)
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ kfree(gcb);
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+ gfp_t gfp)
+{
+ struct nft_set_gc_batch *gcb;
+
+ gcb = kzalloc(sizeof(*gcb), gfp);
+ if (gcb == NULL)
+ return gcb;
+ gcb->head.set = set;
+ return gcb;
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
+
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
@@ -3526,6 +3805,10 @@ static void nf_tables_commit_release(struct nft_trans *trans)
case NFT_MSG_DELSET:
nft_set_destroy(nft_trans_set(trans));
break;
+ case NFT_MSG_DELSETELEM:
+ nft_set_elem_destroy(nft_trans_elem_set(trans),
+ nft_trans_elem(trans).priv);
+ break;
}
kfree(trans);
}
@@ -3540,7 +3823,7 @@ static int nf_tables_commit(struct sk_buff *skb)
while (++net->nft.base_seq == 0);
/* A new generation has just started */
- net->nft.gencursor = gencursor_next(net);
+ net->nft.gencursor = nft_gencursor_next(net);
/* Make sure all packets have left the previous generation before
* purging old rules.
@@ -3611,24 +3894,23 @@ static int nf_tables_commit(struct sk_buff *skb)
NFT_MSG_DELSET, GFP_KERNEL);
break;
case NFT_MSG_NEWSETELEM:
- nf_tables_setelem_notify(&trans->ctx,
- nft_trans_elem_set(trans),
- &nft_trans_elem(trans),
+ te = (struct nft_trans_elem *)trans->data;
+
+ te->set->ops->activate(te->set, &te->elem);
+ nf_tables_setelem_notify(&trans->ctx, te->set,
+ &te->elem,
NFT_MSG_NEWSETELEM, 0);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
+
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_DELSETELEM, 0);
- te->set->ops->get(te->set, &te->elem);
- nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
- if (te->set->flags & NFT_SET_MAP &&
- !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(&te->elem.data, te->set->dtype);
te->set->ops->remove(te->set, &te->elem);
- nft_trans_destroy(trans);
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
break;
}
}
@@ -3660,6 +3942,10 @@ static void nf_tables_abort_release(struct nft_trans *trans)
case NFT_MSG_NEWSET:
nft_set_destroy(nft_trans_set(trans));
break;
+ case NFT_MSG_NEWSETELEM:
+ nft_set_elem_destroy(nft_trans_elem_set(trans),
+ nft_trans_elem(trans).priv);
+ break;
}
kfree(trans);
}
@@ -3728,18 +4014,17 @@ static int nf_tables_abort(struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- nft_trans_elem_set(trans)->nelems--;
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->get(te->set, &te->elem);
- nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
- if (te->set->flags & NFT_SET_MAP &&
- !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(&te->elem.data, te->set->dtype);
+
te->set->ops->remove(te->set, &te->elem);
- nft_trans_destroy(trans);
+ atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
- nft_trans_elem_set(trans)->nelems++;
+ te = (struct nft_trans_elem *)trans->data;
+
+ te->set->ops->activate(te->set, &te->elem);
+ te->set->ndeact--;
+
nft_trans_destroy(trans);
break;
}
@@ -3814,13 +4099,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
- if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_data *data;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
- switch (elem->data.verdict) {
+ data = nft_set_ext_data(ext);
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- return nf_tables_check_loops(ctx, elem->data.chain);
+ return nf_tables_check_loops(ctx, data->verdict.chain);
default:
return 0;
}
@@ -3853,10 +4143,11 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
if (data == NULL)
continue;
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- err = nf_tables_check_loops(ctx, data->chain);
+ err = nf_tables_check_loops(ctx,
+ data->verdict.chain);
if (err < 0)
return err;
default:
@@ -3871,7 +4162,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
continue;
list_for_each_entry(binding, &set->bindings, list) {
- if (binding->chain != chain)
+ if (!(binding->flags & NFT_SET_MAP) ||
+ binding->chain != chain)
continue;
iter.skip = 0;
@@ -3889,85 +4181,129 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
}
/**
- * nft_validate_input_register - validate an expressions' input register
+ * nft_parse_register - parse a register value from a netlink attribute
*
- * @reg: the register number
+ * @attr: netlink attribute
*
- * Validate that the input register is one of the general purpose
- * registers.
+ * Parse and translate a register value from a netlink attribute.
+ * Registers used to be 128 bit wide, these register numbers will be
+ * mapped to the corresponding 32 bit register numbers.
*/
-int nft_validate_input_register(enum nft_registers reg)
+unsigned int nft_parse_register(const struct nlattr *attr)
{
- if (reg <= NFT_REG_VERDICT)
- return -EINVAL;
- if (reg > NFT_REG_MAX)
- return -ERANGE;
- return 0;
+ unsigned int reg;
+
+ reg = ntohl(nla_get_be32(attr));
+ switch (reg) {
+ case NFT_REG_VERDICT...NFT_REG_4:
+ return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ default:
+ return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ }
}
-EXPORT_SYMBOL_GPL(nft_validate_input_register);
+EXPORT_SYMBOL_GPL(nft_parse_register);
/**
- * nft_validate_output_register - validate an expressions' output register
+ * nft_dump_register - dump a register value to a netlink attribute
+ *
+ * @skb: socket buffer
+ * @attr: attribute number
+ * @reg: register number
+ *
+ * Construct a netlink attribute containing the register number. For
+ * compatibility reasons, register numbers being a multiple of 4 are
+ * translated to the corresponding 128 bit register numbers.
+ */
+int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
+{
+ if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
+ reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
+ else
+ reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;
+
+ return nla_put_be32(skb, attr, htonl(reg));
+}
+EXPORT_SYMBOL_GPL(nft_dump_register);
+
+/**
+ * nft_validate_register_load - validate a load from a register
*
* @reg: the register number
+ * @len: the length of the data
*
- * Validate that the output register is one of the general purpose
- * registers or the verdict register.
+ * Validate that the input register is one of the general purpose
+ * registers and that the length of the load is within the bounds.
*/
-int nft_validate_output_register(enum nft_registers reg)
+int nft_validate_register_load(enum nft_registers reg, unsigned int len)
{
- if (reg < NFT_REG_VERDICT)
+ if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
- if (reg > NFT_REG_MAX)
+ if (len == 0)
+ return -EINVAL;
+ if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
return -ERANGE;
+
return 0;
}
-EXPORT_SYMBOL_GPL(nft_validate_output_register);
+EXPORT_SYMBOL_GPL(nft_validate_register_load);
/**
- * nft_validate_data_load - validate an expressions' data load
+ * nft_validate_register_store - validate an expressions' register store
*
* @ctx: context of the expression performing the load
* @reg: the destination register number
* @data: the data to load
* @type: the data type
+ * @len: the length of the data
*
* Validate that a data load uses the appropriate data type for
- * the destination register. A value of NULL for the data means
- * that its runtime gathered data, which is always of type
- * NFT_DATA_VALUE.
+ * the destination register and the length is within the bounds.
+ * A value of NULL for the data means that its runtime gathered
+ * data.
*/
-int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type)
+int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
{
int err;
switch (reg) {
case NFT_REG_VERDICT:
- if (data == NULL || type != NFT_DATA_VERDICT)
+ if (type != NFT_DATA_VERDICT)
return -EINVAL;
- if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) {
- err = nf_tables_check_loops(ctx, data->chain);
+ if (data != NULL &&
+ (data->verdict.code == NFT_GOTO ||
+ data->verdict.code == NFT_JUMP)) {
+ err = nf_tables_check_loops(ctx, data->verdict.chain);
if (err < 0)
return err;
- if (ctx->chain->level + 1 > data->chain->level) {
+ if (ctx->chain->level + 1 >
+ data->verdict.chain->level) {
if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
return -EMLINK;
- data->chain->level = ctx->chain->level + 1;
+ data->verdict.chain->level = ctx->chain->level + 1;
}
}
return 0;
default:
+ if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
+ return -EINVAL;
+ if (len == 0)
+ return -EINVAL;
+ if (reg * NFT_REG32_SIZE + len >
+ FIELD_SIZEOF(struct nft_regs, data))
+ return -ERANGE;
+
if (data != NULL && type != NFT_DATA_VALUE)
return -EINVAL;
return 0;
}
}
-EXPORT_SYMBOL_GPL(nft_validate_data_load);
+EXPORT_SYMBOL_GPL(nft_validate_register_store);
static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
@@ -3988,11 +4324,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
- data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+ data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
- switch (data->verdict) {
+ switch (data->verdict.code) {
default:
- switch (data->verdict & NF_VERDICT_MASK) {
+ switch (data->verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
@@ -4018,7 +4354,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return -EOPNOTSUPP;
chain->use++;
- data->chain = chain;
+ data->verdict.chain = chain;
desc->len = sizeof(data);
break;
}
@@ -4029,10 +4365,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- data->chain->use--;
+ data->verdict.chain->use--;
break;
}
}
@@ -4045,13 +4381,14 @@ static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
if (!nest)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
goto nla_put_failure;
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+ if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
+ data->verdict.chain->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
@@ -4061,7 +4398,8 @@ nla_put_failure:
return -1;
}
-static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+static int nft_value_init(const struct nft_ctx *ctx,
+ struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla)
{
unsigned int len;
@@ -4069,10 +4407,10 @@ static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
len = nla_len(nla);
if (len == 0)
return -EINVAL;
- if (len > sizeof(data->data))
+ if (len > size)
return -EOVERFLOW;
- nla_memcpy(data->data, nla, sizeof(data->data));
+ nla_memcpy(data->data, nla, len);
desc->type = NFT_DATA_VALUE;
desc->len = len;
return 0;
@@ -4085,8 +4423,7 @@ static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
}
static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
- [NFTA_DATA_VALUE] = { .type = NLA_BINARY,
- .len = FIELD_SIZEOF(struct nft_data, data) },
+ [NFTA_DATA_VALUE] = { .type = NLA_BINARY },
[NFTA_DATA_VERDICT] = { .type = NLA_NESTED },
};
@@ -4095,6 +4432,7 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
*
* @ctx: context of the expression using the data
* @data: destination struct nft_data
+ * @size: maximum data length
* @desc: data description
* @nla: netlink attribute containing data
*
@@ -4104,7 +4442,8 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
* The caller can indicate that it only wants to accept data of type
* NFT_DATA_VALUE by passing NULL for the ctx argument.
*/
-int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+int nft_data_init(const struct nft_ctx *ctx,
+ struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla)
{
struct nlattr *tb[NFTA_DATA_MAX + 1];
@@ -4115,7 +4454,8 @@ int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
return err;
if (tb[NFTA_DATA_VALUE])
- return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+ return nft_value_init(ctx, data, size, desc,
+ tb[NFTA_DATA_VALUE]);
if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
return -EINVAL;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 2d298dccb6dd..f153b07073af 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -21,24 +22,66 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
+enum nft_trace {
+ NFT_TRACE_RULE,
+ NFT_TRACE_RETURN,
+ NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+ [NFT_TRACE_RULE] = "rule",
+ [NFT_TRACE_RETURN] = "return",
+ [NFT_TRACE_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = LOGLEVEL_WARNING,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+static void __nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
+}
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ if (unlikely(pkt->skb->nf_trace))
+ __nft_trace_packet(pkt, chain, rulenum, type);
+}
+
static void nft_cmp_fast_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1])
+ struct nft_regs *regs)
{
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
u32 mask = nft_cmp_fast_mask(priv->len);
- if ((data[priv->sreg].data[0] & mask) == priv->data)
+ if ((regs->data[priv->sreg] & mask) == priv->data)
return;
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static bool nft_payload_fast_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
unsigned char *ptr;
if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
@@ -51,12 +94,13 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
return false;
+ *dest = 0;
if (priv->len == 2)
- *(u16 *)dest->data = *(u16 *)ptr;
+ *(u16 *)dest = *(u16 *)ptr;
else if (priv->len == 4)
- *(u32 *)dest->data = *(u32 *)ptr;
+ *(u32 *)dest = *(u32 *)ptr;
else
- *(u8 *)dest->data = *(u8 *)ptr;
+ *(u8 *)dest = *(u8 *)ptr;
return true;
}
@@ -66,62 +110,25 @@ struct nft_jumpstack {
int rulenum;
};
-enum nft_trace {
- NFT_TRACE_RULE,
- NFT_TRACE_RETURN,
- NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
- [NFT_TRACE_RULE] = "rule",
- [NFT_TRACE_RETURN] = "return",
- [NFT_TRACE_POLICY] = "policy",
-};
-
-static struct nf_loginfo trace_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 4,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static void nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
-{
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
- nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
- pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
- chain->table->name, chain->name, comments[type],
- rulenum);
-}
-
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
const struct nft_chain *chain = ops->priv, *basechain = chain;
+ const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
- struct nft_data data[NFT_REG_MAX + 1];
+ struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
struct nft_stats *stats;
int rulenum;
- /*
- * Cache cursor to avoid problems in case that the cursor is updated
- * while traversing the ruleset.
- */
- unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+ unsigned int gencursor = nft_genmask_cur(net);
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
next_rule:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs.verdict.code = NFT_CONTINUE;
list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
/* This rule is not active, skip. */
@@ -132,62 +139,52 @@ next_rule:
nft_rule_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
- nft_cmp_fast_eval(expr, data);
+ nft_cmp_fast_eval(expr, &regs);
else if (expr->ops != &nft_payload_fast_ops ||
- !nft_payload_fast_eval(expr, data, pkt))
- expr->ops->eval(expr, data, pkt);
+ !nft_payload_fast_eval(expr, &regs, pkt))
+ expr->ops->eval(expr, &regs, pkt);
- if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+ if (regs.verdict.code != NFT_CONTINUE)
break;
}
- switch (data[NFT_REG_VERDICT].verdict) {
+ switch (regs.verdict.code) {
case NFT_BREAK:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs.verdict.code = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
continue;
}
break;
}
- switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) {
+ switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
- return data[NFT_REG_VERDICT].verdict;
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ return regs.verdict.code;
}
- switch (data[NFT_REG_VERDICT].verdict) {
+ switch (regs.verdict.code) {
case NFT_JUMP:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rule = rule;
jumpstack[stackptr].rulenum = rulenum;
stackptr++;
- chain = data[NFT_REG_VERDICT].chain;
- goto do_chain;
+ /* fall through */
case NFT_GOTO:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
- chain = data[NFT_REG_VERDICT].chain;
+ chain = regs.verdict.chain;
goto do_chain;
- case NFT_RETURN:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
- break;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN)))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+ rulenum++;
+ /* fall through */
+ case NFT_RETURN:
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
break;
default:
WARN_ON(1);
@@ -201,8 +198,7 @@ next_rule:
goto next_rule;
}
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+ nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
@@ -244,8 +240,14 @@ int __init nf_tables_core_module_init(void)
if (err < 0)
goto err6;
+ err = nft_dynset_module_init();
+ if (err < 0)
+ goto err7;
+
return 0;
+err7:
+ nft_payload_module_exit();
err6:
nft_byteorder_module_exit();
err5:
@@ -262,6 +264,7 @@ err1:
void nf_tables_core_module_exit(void)
{
+ nft_dynset_module_exit();
nft_payload_module_exit();
nft_byteorder_module_exit();
nft_bitwise_module_exit();
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11d85b3813f2..3ad91266c821 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -23,6 +23,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
@@ -62,7 +63,7 @@ struct nfulnl_instance {
struct timer_list timer;
struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
- int peer_portid; /* PORTID of the peer process */
+ u32 peer_portid; /* PORTID of the peer process */
/* configurable parameters */
unsigned int flushtimeout; /* timeout until queue flush */
@@ -151,7 +152,7 @@ static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
instance_create(struct net *net, u_int16_t group_num,
- int portid, struct user_namespace *user_ns)
+ u32 portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
struct nfnl_log_net *log = nfnl_log_pernet(net);
@@ -448,14 +449,18 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physindev;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physindev &&
+
+ physindev = nf_bridge_get_physindev(skb);
+ if (physindev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
- htonl(skb->nf_bridge->physindev->ifindex)))
+ htonl(physindev->ifindex)))
goto nla_put_failure;
}
#endif
@@ -479,14 +484,18 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physoutdev;
+
/* Case 2: indev is a bridge group, we need to look
* for physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physoutdev &&
+
+ physoutdev = nf_bridge_get_physoutdev(skb);
+ if (physoutdev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
- htonl(skb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutdev->ifindex)))
goto nla_put_failure;
}
#endif
@@ -539,7 +548,7 @@ __build_packet_message(struct nfnl_log_net *log,
/* UID */
sk = skb->sk;
- if (sk && sk->sk_state != TCP_TIME_WAIT) {
+ if (sk && sk_fullsock(sk)) {
read_lock_bh(&sk->sk_callback_lock);
if (sk->sk_socket && sk->sk_socket->file) {
struct file *file = sk->sk_socket->file;
@@ -998,11 +1007,13 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfulnl_instance *inst = v;
- return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
- inst->group_num,
- inst->peer_portid, inst->qlen,
- inst->copy_mode, inst->copy_range,
- inst->flushtimeout, atomic_read(&inst->use));
+ seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
+ inst->group_num,
+ inst->peer_portid, inst->qlen,
+ inst->copy_mode, inst->copy_range,
+ inst->flushtimeout, atomic_read(&inst->use));
+
+ return 0;
}
static const struct seq_operations nful_seq_ops = {
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 0db8515e76da..0b98c7420239 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -25,6 +25,7 @@
#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
#include <linux/list.h>
@@ -54,7 +55,7 @@ struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
- int peer_portid;
+ u32 peer_portid;
unsigned int queue_maxlen;
unsigned int copy_range;
unsigned int queue_dropped;
@@ -109,8 +110,7 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
- int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
{
struct nfqnl_instance *inst;
unsigned int h;
@@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
{
const struct cred *cred;
- if (sk->sk_state == TCP_TIME_WAIT)
+ if (!sk_fullsock(sk))
return 0;
read_lock_bh(&sk->sk_callback_lock);
@@ -314,13 +314,13 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (entskb->tstamp.tv64)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
- if (entry->hook <= NF_INET_FORWARD ||
- (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
+ if (entry->state.hook <= NF_INET_FORWARD ||
+ (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
csum_verify = !skb_csum_unnecessary(entskb);
else
csum_verify = false;
- outdev = entry->outdev;
+ outdev = entry->state.out;
switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
case NFQNL_COPY_META:
@@ -368,23 +368,23 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
return NULL;
}
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = entry->pf;
+ nfmsg->nfgen_family = entry->state.pf;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(queue->queue_num);
nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
pmsg = nla_data(nla);
pmsg->hw_protocol = entskb->protocol;
- pmsg->hook = entry->hook;
+ pmsg->hook = entry->state.hook;
*packet_id_ptr = &pmsg->packet_id;
- indev = entry->indev;
+ indev = entry->state.in;
if (indev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
goto nla_put_failure;
#else
- if (entry->pf == PF_BRIDGE) {
+ if (entry->state.pf == PF_BRIDGE) {
/* Case 1: indev is physical input device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
@@ -396,14 +396,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physinif;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physindev &&
+
+ physinif = nf_bridge_get_physinif(entskb);
+ if (physinif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
- htonl(entskb->nf_bridge->physindev->ifindex)))
+ htonl(physinif)))
goto nla_put_failure;
}
#endif
@@ -414,7 +418,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
goto nla_put_failure;
#else
- if (entry->pf == PF_BRIDGE) {
+ if (entry->state.pf == PF_BRIDGE) {
/* Case 1: outdev is physical output device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
@@ -426,14 +430,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physoutif;
+
/* Case 2: outdev is bridge group, we need to look for
* physical output device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physoutdev &&
+
+ physoutif = nf_bridge_get_physoutif(entskb);
+ if (physoutif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
- htonl(entskb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutif)))
goto nla_put_failure;
}
#endif
@@ -633,8 +641,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
struct nfqnl_instance *queue;
struct sk_buff *skb, *segs;
int err = -ENOBUFS;
- struct net *net = dev_net(entry->indev ?
- entry->indev : entry->outdev);
+ struct net *net = dev_net(entry->state.in ?
+ entry->state.in : entry->state.out);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
/* rcu_read_lock()ed by nf_hook_slow() */
@@ -647,7 +655,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
skb = entry->skb;
- switch (entry->pf) {
+ switch (entry->state.pf) {
case NFPROTO_IPV4:
skb->protocol = htons(ETH_P_IP);
break;
@@ -757,19 +765,20 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
static int
dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
{
- if (entry->indev)
- if (entry->indev->ifindex == ifindex)
+ if (entry->state.in)
+ if (entry->state.in->ifindex == ifindex)
return 1;
- if (entry->outdev)
- if (entry->outdev->ifindex == ifindex)
+ if (entry->state.out)
+ if (entry->state.out->ifindex == ifindex)
return 1;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- if (entry->skb->nf_bridge->physindev &&
- entry->skb->nf_bridge->physindev->ifindex == ifindex)
- return 1;
- if (entry->skb->nf_bridge->physoutdev &&
- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ int physinif, physoutif;
+
+ physinif = nf_bridge_get_physinif(entry->skb);
+ physoutif = nf_bridge_get_physoutif(entry->skb);
+
+ if (physinif == ifindex || physoutif == ifindex)
return 1;
}
#endif
@@ -860,7 +869,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
};
static struct nfqnl_instance *
-verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
{
struct nfqnl_instance *queue;
@@ -1242,7 +1251,7 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfqnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
inst->queue_num,
inst->peer_portid, inst->queue_total,
inst->copy_mode, inst->copy_range,
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 4fb6ee2c1106..d71cc18fa35d 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -26,18 +26,16 @@ struct nft_bitwise {
};
static void nft_bitwise_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- const struct nft_data *src = &data[priv->sreg];
- struct nft_data *dst = &data[priv->dreg];
+ const u32 *src = &regs->data[priv->sreg];
+ u32 *dst = &regs->data[priv->dreg];
unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
- dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
- priv->xor.data[i];
- }
+ for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+ dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
@@ -63,28 +61,27 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_XOR] == NULL)
return -EINVAL;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+ priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
+ err = nft_validate_register_load(priv->sreg, priv->len);
if (err < 0)
return err;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
- err = nft_validate_output_register(priv->dreg);
+ priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
if (err < 0)
return err;
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
- priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
- err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+ err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1,
+ tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
if (d1.len != priv->len)
return -EINVAL;
- err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+ err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
+ tb[NFTA_BITWISE_XOR]);
if (err < 0)
return err;
if (d2.len != priv->len)
@@ -97,9 +94,9 @@ static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index c39ed8d29df1..fde5145f2e36 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -26,16 +26,17 @@ struct nft_byteorder {
};
static void nft_byteorder_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+ u32 *src = &regs->data[priv->sreg];
+ u32 *dst = &regs->data[priv->dreg];
union { u32 u32; u16 u16; } *s, *d;
unsigned int i;
- s = (void *)src->data;
- d = (void *)dst->data;
+ s = (void *)src;
+ d = (void *)dst;
switch (priv->size) {
case 4:
@@ -87,19 +88,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
tb[NFTA_BYTEORDER_OP] == NULL)
return -EINVAL;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
- err = nft_validate_input_register(priv->sreg);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
@@ -109,10 +97,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
- if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
- return -EINVAL;
-
priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
switch (priv->size) {
case 2:
@@ -122,16 +106,24 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- return 0;
+ priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+ err = nft_validate_register_load(priv->sreg, priv->len);
+ if (err < 0)
+ return err;
+
+ priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index e2b3f51c81f1..e25b35d70e4d 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -25,13 +25,13 @@ struct nft_cmp_expr {
};
static void nft_cmp_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_cmp_expr *priv = nft_expr_priv(expr);
int d;
- d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+ d = memcmp(&regs->data[priv->sreg], &priv->data, priv->len);
switch (priv->op) {
case NFT_CMP_EQ:
if (d != 0)
@@ -59,7 +59,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
return;
mismatch:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
@@ -75,12 +75,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct nft_data_desc desc;
int err;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
- priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
-
- err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
+ tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
+ priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
+ err = nft_validate_register_load(priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
priv->len = desc.len;
return 0;
}
@@ -89,7 +93,7 @@ static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_cmp_expr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
goto nla_put_failure;
@@ -122,13 +126,18 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
u32 mask;
int err;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
-
- err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, sizeof(data), &desc,
+ tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
- desc.len *= BITS_PER_BYTE;
+ priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
+ err = nft_validate_register_load(priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ desc.len *= BITS_PER_BYTE;
mask = nft_cmp_fast_mask(desc.len);
+
priv->data = data.data[0] & mask;
priv->len = desc.len;
return 0;
@@ -139,7 +148,7 @@ static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
struct nft_data data;
- if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ)))
goto nla_put_failure;
@@ -167,7 +176,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
struct nft_data_desc desc;
struct nft_data data;
- enum nft_registers sreg;
enum nft_cmp_ops op;
int err;
@@ -176,11 +184,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
tb[NFTA_CMP_DATA] == NULL)
return ERR_PTR(-EINVAL);
- sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
- err = nft_validate_input_register(sreg);
- if (err < 0)
- return ERR_PTR(err);
-
op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
switch (op) {
case NFT_CMP_EQ:
@@ -194,7 +197,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
return ERR_PTR(-EINVAL);
}
- err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, sizeof(data), &desc,
+ tb[NFTA_CMP_DATA]);
if (err < 0)
return ERR_PTR(err);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 65f3e2b6be44..7f29cfc76349 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -20,6 +20,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -42,6 +43,7 @@ union nft_entry {
struct ipt_entry e4;
struct ip6t_entry e6;
struct ebt_entry ebt;
+ struct arpt_entry arp;
};
static inline void
@@ -53,7 +55,7 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
}
static void nft_target_eval_xt(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -70,16 +72,16 @@ static void nft_target_eval_xt(const struct nft_expr *expr,
switch (ret) {
case XT_CONTINUE:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs->verdict.code = NFT_CONTINUE;
break;
default:
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
break;
}
}
static void nft_target_eval_bridge(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -96,19 +98,19 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
switch (ret) {
case EBT_ACCEPT:
- data[NFT_REG_VERDICT].verdict = NF_ACCEPT;
+ regs->verdict.code = NF_ACCEPT;
break;
case EBT_DROP:
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
break;
case EBT_CONTINUE:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs->verdict.code = NFT_CONTINUE;
break;
case EBT_RETURN:
- data[NFT_REG_VERDICT].verdict = NFT_RETURN;
+ regs->verdict.code = NFT_RETURN;
break;
default:
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
break;
}
}
@@ -143,6 +145,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
entry->ebt.ethproto = (__force __be16)proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
+ case NFPROTO_ARP:
+ break;
}
par->entryinfo = entry;
par->target = target;
@@ -300,7 +304,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
}
static void nft_match_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -313,16 +317,16 @@ static void nft_match_eval(const struct nft_expr *expr,
ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
if (pkt->xt.hotdrop) {
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
return;
}
- switch(ret) {
- case true:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ switch (ret ? 1 : 0) {
+ case 1:
+ regs->verdict.code = NFT_CONTINUE;
break;
- case false:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ case 0:
+ regs->verdict.code = NFT_BREAK;
break;
}
}
@@ -357,6 +361,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
entry->ebt.ethproto = (__force __be16)proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
+ case NFPROTO_ARP:
+ break;
}
par->entryinfo = entry;
par->match = match;
@@ -543,6 +549,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
case NFPROTO_BRIDGE:
fmt = "ebt_%s";
break;
+ case NFPROTO_ARP:
+ fmt = "arpt_%s";
+ break;
default:
pr_err("nft_compat: unsupported protocol %d\n",
nfmsg->nfgen_family);
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index c89ee486ce54..17591239229f 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -24,7 +24,7 @@ struct nft_counter {
};
static void nft_counter_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_counter *priv = nft_expr_priv(expr);
@@ -92,6 +92,7 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
.ops = &nft_counter_ops,
.policy = nft_counter_policy,
.maxattr = NFTA_COUNTER_MAX,
+ .flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index cc5603016242..8cbca3432f90 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -31,11 +31,11 @@ struct nft_ct {
};
static void nft_ct_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
const struct nf_conn_help *help;
@@ -54,8 +54,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
state = NF_CT_STATE_UNTRACKED_BIT;
else
state = NF_CT_STATE_BIT(ctinfo);
- dest->data[0] = state;
+ *dest = state;
return;
+ default:
+ break;
}
if (ct == NULL)
@@ -63,26 +65,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_CT_DIRECTION:
- dest->data[0] = CTINFO2DIR(ctinfo);
+ *dest = CTINFO2DIR(ctinfo);
return;
case NFT_CT_STATUS:
- dest->data[0] = ct->status;
+ *dest = ct->status;
return;
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
- dest->data[0] = ct->mark;
+ *dest = ct->mark;
return;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
- dest->data[0] = ct->secmark;
+ *dest = ct->secmark;
return;
#endif
case NFT_CT_EXPIRATION:
diff = (long)jiffies - (long)ct->timeout.expires;
if (diff < 0)
diff = 0;
- dest->data[0] = jiffies_to_msecs(diff);
+ *dest = jiffies_to_msecs(diff);
return;
case NFT_CT_HELPER:
if (ct->master == NULL)
@@ -93,9 +95,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
helper = rcu_dereference(help->helper);
if (helper == NULL)
goto err;
- if (strlen(helper->name) >= sizeof(dest->data))
- goto err;
- strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+ strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
@@ -103,58 +103,60 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
unsigned int size;
if (!labels) {
- memset(dest->data, 0, sizeof(dest->data));
+ memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
return;
}
- BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
size = labels->words * sizeof(long);
-
- memcpy(dest->data, labels->bits, size);
- if (size < sizeof(dest->data))
- memset(((char *) dest->data) + size, 0,
- sizeof(dest->data) - size);
+ memcpy(dest, labels->bits, size);
+ if (size < NF_CT_LABELS_MAX_SIZE)
+ memset(((char *) dest) + size, 0,
+ NF_CT_LABELS_MAX_SIZE - size);
return;
}
#endif
+ default:
+ break;
}
tuple = &ct->tuplehash[priv->dir].tuple;
switch (priv->key) {
case NFT_CT_L3PROTOCOL:
- dest->data[0] = nf_ct_l3num(ct);
+ *dest = nf_ct_l3num(ct);
return;
case NFT_CT_SRC:
- memcpy(dest->data, tuple->src.u3.all,
+ memcpy(dest, tuple->src.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
case NFT_CT_DST:
- memcpy(dest->data, tuple->dst.u3.all,
+ memcpy(dest, tuple->dst.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
case NFT_CT_PROTOCOL:
- dest->data[0] = nf_ct_protonum(ct);
+ *dest = nf_ct_protonum(ct);
return;
case NFT_CT_PROTO_SRC:
- dest->data[0] = (__force __u16)tuple->src.u.all;
+ *dest = (__force __u16)tuple->src.u.all;
return;
case NFT_CT_PROTO_DST:
- dest->data[0] = (__force __u16)tuple->dst.u.all;
+ *dest = (__force __u16)tuple->dst.u.all;
return;
+ default:
+ break;
}
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static void nft_ct_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_ct *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
#ifdef CONFIG_NF_CONNTRACK_MARK
- u32 value = data[priv->sreg].data[0];
+ u32 value = regs->data[priv->sreg];
#endif
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
@@ -172,6 +174,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
}
break;
#endif
+ default:
+ break;
}
}
@@ -220,12 +224,17 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
- case NFT_CT_STATE:
case NFT_CT_DIRECTION:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = sizeof(u8);
+ break;
+ case NFT_CT_STATE:
case NFT_CT_STATUS:
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
@@ -233,22 +242,54 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
#endif
+ case NFT_CT_EXPIRATION:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = sizeof(u32);
+ break;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = NF_CT_LABELS_MAX_SIZE;
+ break;
#endif
- case NFT_CT_EXPIRATION:
case NFT_CT_HELPER:
if (tb[NFTA_CT_DIRECTION] != NULL)
return -EINVAL;
+ len = NF_CT_HELPER_NAME_LEN;
break;
+
case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+ len = sizeof(u8);
+ break;
case NFT_CT_SRC:
case NFT_CT_DST:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_IPV4:
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ src.u3.ip);
+ break;
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ src.u3.ip6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ break;
case NFT_CT_PROTO_SRC:
case NFT_CT_PROTO_DST:
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
break;
default:
return -EOPNOTSUPP;
@@ -265,12 +306,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
}
}
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
if (err < 0)
return err;
@@ -286,20 +324,22 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
+ len = FIELD_SIZEOF(struct nf_conn, mark);
break;
#endif
default:
return -EOPNOTSUPP;
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
+ err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
return err;
@@ -320,7 +360,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
@@ -347,7 +387,7 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CT_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
new file mode 100644
index 000000000000..513a8ef60a59
--- /dev/null
+++ b/net/netfilter/nft_dynset.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_dynset {
+ struct nft_set *set;
+ struct nft_set_ext_tmpl tmpl;
+ enum nft_dynset_ops op:8;
+ enum nft_registers sreg_key:8;
+ enum nft_registers sreg_data:8;
+ u64 timeout;
+ struct nft_expr *expr;
+ struct nft_set_binding binding;
+};
+
+static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
+ struct nft_regs *regs)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set_ext *ext;
+ u64 timeout;
+ void *elem;
+
+ if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
+ return NULL;
+
+ timeout = priv->timeout ? : set->timeout;
+ elem = nft_set_elem_init(set, &priv->tmpl,
+ &regs->data[priv->sreg_key],
+ &regs->data[priv->sreg_data],
+ timeout, GFP_ATOMIC);
+ if (elem == NULL) {
+ if (set->size)
+ atomic_dec(&set->nelems);
+ return NULL;
+ }
+
+ ext = nft_set_elem_ext(set, elem);
+ if (priv->expr != NULL)
+ nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+
+ return elem;
+}
+
+static void nft_dynset_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
+ const struct nft_expr *sexpr;
+ u64 timeout;
+
+ if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
+ expr, regs, &ext)) {
+ sexpr = NULL;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ sexpr = nft_set_ext_expr(ext);
+
+ if (priv->op == NFT_DYNSET_OP_UPDATE &&
+ nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ timeout = priv->timeout ? : set->timeout;
+ *nft_set_ext_expiration(ext) = jiffies + timeout;
+ } else if (sexpr == NULL)
+ goto out;
+
+ if (sexpr != NULL)
+ sexpr->ops->eval(sexpr, regs, pkt);
+ return;
+ }
+out:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
+ [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 },
+ [NFTA_DYNSET_OP] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
+ [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED },
+};
+
+static int nft_dynset_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set;
+ u64 timeout;
+ int err;
+
+ if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
+ tb[NFTA_DYNSET_OP] == NULL ||
+ tb[NFTA_DYNSET_SREG_KEY] == NULL)
+ return -EINVAL;
+
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_DYNSET_SET_ID])
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_DYNSET_SET_ID]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
+ if (set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+ switch (priv->op) {
+ case NFT_DYNSET_OP_ADD:
+ break;
+ case NFT_DYNSET_OP_UPDATE:
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ timeout = 0;
+ if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+ }
+
+ priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
+ err = nft_validate_register_load(priv->sreg_key, set->klen);;
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+ if (!(set->flags & NFT_SET_MAP))
+ return -EINVAL;
+ if (set->dtype == NFT_DATA_VERDICT)
+ return -EOPNOTSUPP;
+
+ priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]);
+ err = nft_validate_register_load(priv->sreg_data, set->dlen);
+ if (err < 0)
+ return err;
+ } else if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ if (tb[NFTA_DYNSET_EXPR] != NULL) {
+ if (!(set->flags & NFT_SET_EVAL))
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_ANONYMOUS))
+ return -EOPNOTSUPP;
+
+ priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
+ if (IS_ERR(priv->expr))
+ return PTR_ERR(priv->expr);
+
+ err = -EOPNOTSUPP;
+ if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
+ goto err1;
+ } else if (set->flags & NFT_SET_EVAL)
+ return -EINVAL;
+
+ nft_set_ext_prepare(&priv->tmpl);
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (set->flags & NFT_SET_MAP)
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+ if (priv->expr != NULL)
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR,
+ priv->expr->ops->size);
+ if (set->flags & NFT_SET_TIMEOUT) {
+ if (timeout || set->timeout)
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
+
+ priv->timeout = timeout;
+
+ err = nf_tables_bind_set(ctx, set, &priv->binding);
+ if (err < 0)
+ goto err1;
+
+ priv->set = set;
+ return 0;
+
+err1:
+ if (priv->expr != NULL)
+ nft_expr_destroy(ctx, priv->expr);
+ return err;
+}
+
+static void nft_dynset_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ if (priv->expr != NULL)
+ nft_expr_destroy(ctx, priv->expr);
+}
+
+static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
+ goto nla_put_failure;
+ if (priv->set->flags & NFT_SET_MAP &&
+ nft_dump_register(skb, NFTA_DYNSET_SREG_DATA, priv->sreg_data))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op)))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
+ goto nla_put_failure;
+ if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dynset_type;
+static const struct nft_expr_ops nft_dynset_ops = {
+ .type = &nft_dynset_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
+ .eval = nft_dynset_eval,
+ .init = nft_dynset_init,
+ .destroy = nft_dynset_destroy,
+ .dump = nft_dynset_dump,
+};
+
+static struct nft_expr_type nft_dynset_type __read_mostly = {
+ .name = "dynset",
+ .ops = &nft_dynset_ops,
+ .policy = nft_dynset_policy,
+ .maxattr = NFTA_DYNSET_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_dynset_module_init(void)
+{
+ return nft_register_expr(&nft_dynset_type);
+}
+
+void nft_dynset_module_exit(void)
+{
+ nft_unregister_expr(&nft_dynset_type);
+}
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
deleted file mode 100644
index b6eed4d5a096..000000000000
--- a/net/netfilter/nft_expr_template.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-
-struct nft_template {
-
-};
-
-static void nft_template_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
-}
-
-static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
- [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 },
-};
-
-static int nft_template_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
- return 0;
-}
-
-static void nft_template_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
-}
-
-static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
-{
- const struct nft_template *priv = nft_expr_priv(expr);
-
- NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
-static struct nft_expr_type nft_template_type;
-static const struct nft_expr_ops nft_template_ops = {
- .type = &nft_template_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_template)),
- .eval = nft_template_eval,
- .init = nft_template_init,
- .destroy = nft_template_destroy,
- .dump = nft_template_dump,
-};
-
-static struct nft_expr_type nft_template_type __read_mostly = {
- .name = "template",
- .ops = &nft_template_ops,
- .policy = nft_template_policy,
- .maxattr = NFTA_TEMPLATE_MAX,
- .owner = THIS_MODULE,
-};
-
-static int __init nft_template_module_init(void)
-{
- return nft_register_expr(&nft_template_type);
-}
-
-static void __exit nft_template_module_exit(void)
-{
- nft_unregister_expr(&nft_template_type);
-}
-
-module_init(nft_template_module_init);
-module_exit(nft_template_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 55c939f5371f..ba7aed13e174 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -26,11 +26,11 @@ struct nft_exthdr {
};
static void nft_exthdr_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
unsigned int offset = 0;
int err;
@@ -39,11 +39,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
goto err;
offset += priv->offset;
- if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
@@ -58,7 +59,6 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- int err;
if (tb[NFTA_EXTHDR_DREG] == NULL ||
tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -69,22 +69,17 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
- if (priv->len == 0 ||
- priv->len > FIELD_SIZEOF(struct nft_data, data))
- return -EINVAL;
+ priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_exthdr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 37c15e674884..3f9d45d3d9b7 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,6 +15,7 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
+#include <linux/workqueue.h>
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -23,119 +24,175 @@
/* We target a hash table size of 4, element hint is 75% of final size */
#define NFT_HASH_ELEMENT_HINT 3
+struct nft_hash {
+ struct rhashtable ht;
+ struct delayed_work gc_work;
+};
+
struct nft_hash_elem {
struct rhash_head node;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_set_ext ext;
+};
+
+struct nft_hash_cmp_arg {
+ const struct nft_set *set;
+ const u32 *key;
+ u8 genmask;
};
-static bool nft_hash_lookup(const struct nft_set *set,
- const struct nft_data *key,
- struct nft_data *data)
+static const struct rhashtable_params nft_hash_params;
+
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_cmp_arg *arg = data;
+
+ return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
{
- struct rhashtable *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he = data;
+
+ return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_hash_cmp_arg *x = arg->key;
+ const struct nft_hash_elem *he = ptr;
+
+ if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
+ if (nft_set_elem_expired(&he->ext))
+ return 1;
+ if (!nft_set_elem_active(&he->ext, x->genmask))
+ return 1;
+ return 0;
+}
+
+static bool nft_hash_lookup(const struct nft_set *set, const u32 *key,
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(read_pnet(&set->pnet)),
+ .set = set,
+ .key = key,
+ };
- he = rhashtable_lookup(priv, key);
- if (he && set->flags & NFT_SET_MAP)
- nft_data_copy(data, he->data);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ *ext = &he->ext;
return !!he;
}
-static int nft_hash_insert(const struct nft_set *set,
- const struct nft_set_elem *elem)
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_regs *regs),
+ const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_set_ext **ext)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
- unsigned int size;
-
- if (elem->flags != 0)
- return -EINVAL;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
- size = sizeof(*he);
- if (set->flags & NFT_SET_MAP)
- size += sizeof(he->data[0]);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ goto out;
- he = kzalloc(size, GFP_KERNEL);
+ he = new(set, expr, regs);
if (he == NULL)
- return -ENOMEM;
-
- nft_data_copy(&he->key, &elem->key);
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(he->data, &elem->data);
-
- rhashtable_insert(priv, &he->node);
+ goto err1;
+ if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params))
+ goto err2;
+out:
+ *ext = &he->ext;
+ return true;
- return 0;
+err2:
+ nft_set_elem_destroy(set, he);
+err1:
+ return false;
}
-static void nft_hash_elem_destroy(const struct nft_set *set,
- struct nft_hash_elem *he)
+static int nft_hash_insert(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- nft_data_uninit(&he->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(he->data, set->dtype);
- kfree(he);
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
}
-static void nft_hash_remove(const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_hash_activate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
- rhashtable_remove(priv, elem->cookie);
- synchronize_rcu();
- kfree(elem->cookie);
+ nft_set_elem_change_active(set, &he->ext);
+ nft_set_elem_clear_busy(&he->ext);
}
-struct nft_compare_arg {
- const struct nft_set *set;
- struct nft_set_elem *elem;
-};
-
-static bool nft_hash_compare(void *ptr, void *arg)
+static void *nft_hash_deactivate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct nft_hash_elem *he = ptr;
- struct nft_compare_arg *x = arg;
-
- if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
- x->elem->cookie = he;
- x->elem->flags = 0;
- if (x->set->flags & NFT_SET_MAP)
- nft_data_copy(&x->elem->data, he->data);
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .set = set,
+ .key = elem->key.val.data,
+ };
- return true;
+ rcu_read_lock();
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL) {
+ if (!nft_set_elem_mark_busy(&he->ext))
+ nft_set_elem_change_active(set, &he->ext);
+ else
+ he = NULL;
}
+ rcu_read_unlock();
- return false;
+ return he;
}
-static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
- struct nft_compare_arg arg = {
- .set = set,
- .elem = elem,
- };
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
- if (rhashtable_lookup_compare(priv, &elem->key,
- &nft_hash_compare, &arg))
- return 0;
-
- return -ENOENT;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
}
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
- struct rhashtable *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
struct rhashtable_iter hti;
struct nft_set_elem elem;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int err;
- err = rhashtable_walk_init(priv, &hti);
+ err = rhashtable_walk_init(&priv->ht, &hti);
iter->err = err;
if (err)
return;
@@ -159,11 +216,12 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
+ if (nft_set_elem_expired(&he->ext))
+ goto cont;
+ if (!nft_set_elem_active(&he->ext, genmask))
+ goto cont;
- memcpy(&elem.key, &he->key, sizeof(elem.key));
- if (set->flags & NFT_SET_MAP)
- memcpy(&elem.data, he->data, sizeof(elem.data));
- elem.flags = 0;
+ elem.priv = he;
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0)
@@ -178,47 +236,102 @@ out:
rhashtable_walk_exit(&hti);
}
+static void nft_hash_gc(struct work_struct *work)
+{
+ struct nft_set *set;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
+ int err;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = nft_set_container_of(priv);
+
+ err = rhashtable_walk_init(&priv->ht, &hti);
+ if (err)
+ goto schedule;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN)
+ goto out;
+ continue;
+ }
+
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ if (nft_set_elem_mark_busy(&he->ext))
+ continue;
+
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb == NULL)
+ goto out;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, he);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ nft_set_gc_batch_complete(gcb);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
- return sizeof(struct rhashtable);
+ return sizeof(struct nft_hash);
}
+static const struct rhashtable_params nft_hash_params = {
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .hashfn = nft_hash_key,
+ .obj_hashfn = nft_hash_obj,
+ .obj_cmpfn = nft_hash_cmp,
+ .automatic_shrinking = true,
+};
+
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
- struct rhashtable *priv = nft_set_priv(set);
- struct rhashtable_params params = {
- .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
- .head_offset = offsetof(struct nft_hash_elem, node),
- .key_offset = offsetof(struct nft_hash_elem, key),
- .key_len = set->klen,
- .hashfn = jhash,
- };
+ struct nft_hash *priv = nft_set_priv(set);
+ struct rhashtable_params params = nft_hash_params;
+ int err;
- return rhashtable_init(priv, &params);
+ params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+ params.key_len = set->klen;
+
+ err = rhashtable_init(&priv->ht, &params);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_elem_destroy(void *ptr, void *arg)
{
- struct rhashtable *priv = nft_set_priv(set);
- const struct bucket_table *tbl;
- struct nft_hash_elem *he;
- struct rhash_head *pos, *next;
- unsigned int i;
-
- /* Stop an eventual async resizing */
- priv->being_destroyed = true;
- mutex_lock(&priv->mutex);
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+}
- tbl = rht_dereference(priv->tbl, priv);
- for (i = 0; i < tbl->size; i++) {
- rht_for_each_entry_safe(he, pos, next, tbl, i, node)
- nft_hash_elem_destroy(set, he);
- }
- mutex_unlock(&priv->mutex);
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ struct nft_hash *priv = nft_set_priv(set);
- rhashtable_destroy(priv);
+ cancel_delayed_work_sync(&priv->gc_work);
+ rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+ (void *)set);
}
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -227,11 +340,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
unsigned int esize;
esize = sizeof(struct nft_hash_elem);
- if (features & NFT_SET_MAP)
- esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
-
if (desc->size) {
- est->size = sizeof(struct rhashtable) +
+ est->size = sizeof(struct nft_hash) +
roundup_pow_of_two(desc->size * 4 / 3) *
sizeof(struct nft_hash_elem *) +
desc->size * esize;
@@ -251,15 +361,18 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
+ .elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
- .get = nft_hash_get,
.insert = nft_hash_insert,
+ .activate = nft_hash_activate,
+ .deactivate = nft_hash_deactivate,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
+ .update = nft_hash_update,
.walk = nft_hash_walk,
- .features = NFT_SET_MAP,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 810385eb7249..db3b746858e3 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -24,12 +24,12 @@ struct nft_immediate_expr {
};
static void nft_immediate_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
- nft_data_copy(&data[priv->dreg], &priv->data);
+ nft_data_copy(&regs->data[priv->dreg], &priv->data, priv->dlen);
}
static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
@@ -49,17 +49,15 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
tb[NFTA_IMMEDIATE_DATA] == NULL)
return -EINVAL;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+ err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc,
+ tb[NFTA_IMMEDIATE_DATA]);
if (err < 0)
return err;
priv->dlen = desc.len;
- err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+ priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, &priv->data,
+ desc.type, desc.len);
if (err < 0)
goto err1;
@@ -81,7 +79,7 @@ static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_IMMEDIATE_DREG, priv->dreg))
goto nla_put_failure;
return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 85da5bd02f64..435c1ccd6c0e 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -27,7 +27,7 @@ struct nft_limit {
};
static void nft_limit_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
@@ -45,7 +45,7 @@ static void nft_limit_eval(const struct nft_expr *expr,
}
spin_unlock_bh(&limit_lock);
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
@@ -98,6 +98,7 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
.ops = &nft_limit_ops,
.policy = nft_limit_policy,
.maxattr = NFTA_LIMIT_MAX,
+ .flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index bde05f28cf14..a13d6a386d63 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -27,7 +27,7 @@ struct nft_log {
};
static void nft_log_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_log *priv = nft_expr_priv(expr);
@@ -78,7 +78,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
li->u.log.level =
ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
} else {
- li->u.log.level = 4;
+ li->u.log.level = LOGLEVEL_WARNING;
}
if (tb[NFTA_LOG_FLAGS] != NULL) {
li->u.log.logflags =
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 9615b8b9fb37..b3c31ef8015d 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -26,15 +26,20 @@ struct nft_lookup {
};
static void nft_lookup_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
- if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+ if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&regs->data[priv->dreg],
+ nft_set_ext_data(ext), set->dlen);
return;
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ }
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
@@ -66,8 +71,11 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ if (set->flags & NFT_SET_EVAL)
+ return -EOPNOTSUPP;
+
+ priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
+ err = nft_validate_register_load(priv->sreg, set->klen);
if (err < 0)
return err;
@@ -75,19 +83,16 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG]));
- err = nft_validate_output_register(priv->dreg);
+ priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ set->dtype, set->dlen);
if (err < 0)
return err;
-
- if (priv->dreg == NFT_REG_VERDICT) {
- if (set->dtype != NFT_DATA_VERDICT)
- return -EINVAL;
- } else if (set->dtype == NFT_DATA_VERDICT)
- return -EINVAL;
} else if (set->flags & NFT_SET_MAP)
return -EINVAL;
+ priv->binding.flags = set->flags & NFT_SET_MAP;
+
err = nf_tables_bind_set(ctx, set, &priv->binding);
if (err < 0)
return err;
@@ -110,10 +115,10 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
goto nla_put_failure;
if (priv->set->flags & NFT_SET_MAP)
- if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
return 0;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e99911eda915..52561e1c31e2 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -25,65 +25,68 @@
#include <net/netfilter/nft_meta.h>
void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
const struct net_device *in = pkt->in, *out = pkt->out;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
switch (priv->key) {
case NFT_META_LEN:
- dest->data[0] = skb->len;
+ *dest = skb->len;
break;
case NFT_META_PROTOCOL:
- *(__be16 *)dest->data = skb->protocol;
+ *dest = 0;
+ *(__be16 *)dest = skb->protocol;
break;
case NFT_META_NFPROTO:
- dest->data[0] = pkt->ops->pf;
+ *dest = pkt->ops->pf;
break;
case NFT_META_L4PROTO:
- dest->data[0] = pkt->tprot;
+ *dest = pkt->tprot;
break;
case NFT_META_PRIORITY:
- dest->data[0] = skb->priority;
+ *dest = skb->priority;
break;
case NFT_META_MARK:
- dest->data[0] = skb->mark;
+ *dest = skb->mark;
break;
case NFT_META_IIF:
if (in == NULL)
goto err;
- dest->data[0] = in->ifindex;
+ *dest = in->ifindex;
break;
case NFT_META_OIF:
if (out == NULL)
goto err;
- dest->data[0] = out->ifindex;
+ *dest = out->ifindex;
break;
case NFT_META_IIFNAME:
if (in == NULL)
goto err;
- strncpy((char *)dest->data, in->name, sizeof(dest->data));
+ strncpy((char *)dest, in->name, IFNAMSIZ);
break;
case NFT_META_OIFNAME:
if (out == NULL)
goto err;
- strncpy((char *)dest->data, out->name, sizeof(dest->data));
+ strncpy((char *)dest, out->name, IFNAMSIZ);
break;
case NFT_META_IIFTYPE:
if (in == NULL)
goto err;
- *(u16 *)dest->data = in->type;
+ *dest = 0;
+ *(u16 *)dest = in->type;
break;
case NFT_META_OIFTYPE:
if (out == NULL)
goto err;
- *(u16 *)dest->data = out->type;
+ *dest = 0;
+ *(u16 *)dest = out->type;
break;
case NFT_META_SKUID:
- if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
goto err;
read_lock_bh(&skb->sk->sk_callback_lock);
@@ -93,13 +96,12 @@ void nft_meta_get_eval(const struct nft_expr *expr,
goto err;
}
- dest->data[0] =
- from_kuid_munged(&init_user_ns,
+ *dest = from_kuid_munged(&init_user_ns,
skb->sk->sk_socket->file->f_cred->fsuid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
case NFT_META_SKGID:
- if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
goto err;
read_lock_bh(&skb->sk->sk_callback_lock);
@@ -108,8 +110,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
read_unlock_bh(&skb->sk->sk_callback_lock);
goto err;
}
- dest->data[0] =
- from_kgid_munged(&init_user_ns,
+ *dest = from_kgid_munged(&init_user_ns,
skb->sk->sk_socket->file->f_cred->fsgid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
@@ -119,33 +120,33 @@ void nft_meta_get_eval(const struct nft_expr *expr,
if (dst == NULL)
goto err;
- dest->data[0] = dst->tclassid;
+ *dest = dst->tclassid;
break;
}
#endif
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
- dest->data[0] = skb->secmark;
+ *dest = skb->secmark;
break;
#endif
case NFT_META_PKTTYPE:
if (skb->pkt_type != PACKET_LOOPBACK) {
- dest->data[0] = skb->pkt_type;
+ *dest = skb->pkt_type;
break;
}
switch (pkt->ops->pf) {
case NFPROTO_IPV4:
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
- dest->data[0] = PACKET_MULTICAST;
+ *dest = PACKET_MULTICAST;
else
- dest->data[0] = PACKET_BROADCAST;
+ *dest = PACKET_BROADCAST;
break;
case NFPROTO_IPV6:
if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
- dest->data[0] = PACKET_MULTICAST;
+ *dest = PACKET_MULTICAST;
else
- dest->data[0] = PACKET_BROADCAST;
+ *dest = PACKET_BROADCAST;
break;
default:
WARN_ON(1);
@@ -153,23 +154,22 @@ void nft_meta_get_eval(const struct nft_expr *expr,
}
break;
case NFT_META_CPU:
- dest->data[0] = smp_processor_id();
+ *dest = raw_smp_processor_id();
break;
case NFT_META_IIFGROUP:
if (in == NULL)
goto err;
- dest->data[0] = in->group;
+ *dest = in->group;
break;
case NFT_META_OIFGROUP:
if (out == NULL)
goto err;
- dest->data[0] = out->group;
+ *dest = out->group;
break;
case NFT_META_CGROUP:
- if (skb->sk == NULL)
- break;
-
- dest->data[0] = skb->sk->sk_classid;
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ goto err;
+ *dest = skb->sk->sk_classid;
break;
default:
WARN_ON(1);
@@ -178,17 +178,17 @@ void nft_meta_get_eval(const struct nft_expr *expr,
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
- u32 value = data[meta->sreg].data[0];
+ u32 value = regs->data[meta->sreg];
switch (meta->key) {
case NFT_META_MARK:
@@ -218,22 +218,22 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
- int err;
+ unsigned int len;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
- case NFT_META_LEN:
case NFT_META_PROTOCOL:
+ case NFT_META_IIFTYPE:
+ case NFT_META_OIFTYPE:
+ len = sizeof(u16);
+ break;
case NFT_META_NFPROTO:
case NFT_META_L4PROTO:
+ case NFT_META_LEN:
case NFT_META_PRIORITY:
case NFT_META_MARK:
case NFT_META_IIF:
case NFT_META_OIF:
- case NFT_META_IIFNAME:
- case NFT_META_OIFNAME:
- case NFT_META_IIFTYPE:
- case NFT_META_OIFTYPE:
case NFT_META_SKUID:
case NFT_META_SKGID:
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -247,21 +247,19 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_IIFGROUP:
case NFT_META_OIFGROUP:
case NFT_META_CGROUP:
+ len = sizeof(u32);
+ break;
+ case NFT_META_IIFNAME:
+ case NFT_META_OIFNAME:
+ len = IFNAMSIZ;
break;
default:
return -EOPNOTSUPP;
}
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
- return 0;
+ priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
@@ -270,20 +268,24 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
case NFT_META_MARK:
case NFT_META_PRIORITY:
+ len = sizeof(u32);
+ break;
case NFT_META_NFTRACE:
+ len = sizeof(u8);
break;
default:
return -EOPNOTSUPP;
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
+ err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
return err;
@@ -298,7 +300,7 @@ int nft_meta_get_dump(struct sk_buff *skb,
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg))
goto nla_put_failure;
return 0;
@@ -314,7 +316,7 @@ int nft_meta_set_dump(struct sk_buff *skb,
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg))
goto nla_put_failure;
return 0;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index a0837c6c9283..ee2d71753746 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -37,7 +37,7 @@ struct nft_nat {
};
static void nft_nat_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_nat *priv = nft_expr_priv(expr);
@@ -49,33 +49,32 @@ static void nft_nat_eval(const struct nft_expr *expr,
if (priv->sreg_addr_min) {
if (priv->family == AF_INET) {
range.min_addr.ip = (__force __be32)
- data[priv->sreg_addr_min].data[0];
+ regs->data[priv->sreg_addr_min];
range.max_addr.ip = (__force __be32)
- data[priv->sreg_addr_max].data[0];
+ regs->data[priv->sreg_addr_max];
} else {
memcpy(range.min_addr.ip6,
- data[priv->sreg_addr_min].data,
- sizeof(struct nft_data));
+ &regs->data[priv->sreg_addr_min],
+ sizeof(range.min_addr.ip6));
memcpy(range.max_addr.ip6,
- data[priv->sreg_addr_max].data,
- sizeof(struct nft_data));
+ &regs->data[priv->sreg_addr_max],
+ sizeof(range.max_addr.ip6));
}
range.flags |= NF_NAT_RANGE_MAP_IPS;
}
if (priv->sreg_proto_min) {
range.min_proto.all =
- *(__be16 *)&data[priv->sreg_proto_min].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_min];
range.max_proto.all =
- *(__be16 *)&data[priv->sreg_proto_max].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_max];
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
range.flags |= priv->flags;
- data[NFT_REG_VERDICT].verdict =
- nf_nat_setup_info(ct, &range, priv->type);
+ regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type);
}
static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
@@ -119,6 +118,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_nat *priv = nft_expr_priv(expr);
+ unsigned int alen, plen;
u32 family;
int err;
@@ -146,25 +146,34 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (family != AF_INET && family != AF_INET6)
- return -EAFNOSUPPORT;
if (family != ctx->afi->family)
return -EOPNOTSUPP;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip);
+ break;
+ case NFPROTO_IPV6:
+ alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
priv->sreg_addr_min =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN]));
-
- err = nft_validate_input_register(priv->sreg_addr_min);
+ nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]);
+ err = nft_validate_register_load(priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
priv->sreg_addr_max =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX]));
+ nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]);
- err = nft_validate_input_register(priv->sreg_addr_max);
+ err = nft_validate_register_load(priv->sreg_addr_max,
+ alen);
if (err < 0)
return err;
} else {
@@ -172,19 +181,21 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
}
}
+ plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
priv->sreg_proto_min =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN]));
+ nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
- err = nft_validate_input_register(priv->sreg_proto_min);
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
priv->sreg_proto_max =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX]));
+ nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]);
- err = nft_validate_input_register(priv->sreg_proto_max);
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
@@ -220,18 +231,18 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (priv->sreg_addr_min) {
- if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN,
- htonl(priv->sreg_addr_min)) ||
- nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX,
- htonl(priv->sreg_addr_max)))
+ if (nft_dump_register(skb, NFTA_NAT_REG_ADDR_MIN,
+ priv->sreg_addr_min) ||
+ nft_dump_register(skb, NFTA_NAT_REG_ADDR_MAX,
+ priv->sreg_addr_max))
goto nla_put_failure;
}
if (priv->sreg_proto_min) {
- if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
- htonl(priv->sreg_proto_min)) ||
- nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
- htonl(priv->sreg_proto_max)))
+ if (nft_dump_register(skb, NFTA_NAT_REG_PROTO_MIN,
+ priv->sreg_proto_min) ||
+ nft_dump_register(skb, NFTA_NAT_REG_PROTO_MAX,
+ priv->sreg_proto_max))
goto nla_put_failure;
}
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 85daa84bfdfe..94fb3b27a2c5 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -18,12 +18,12 @@
#include <net/netfilter/nf_tables.h>
static void nft_payload_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
int offset;
switch (priv->base) {
@@ -43,11 +43,12 @@ static void nft_payload_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
@@ -62,24 +63,21 @@ static int nft_payload_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload *priv = nft_expr_priv(expr);
- int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]);
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_payload *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+ if (nft_dump_register(skb, NFTA_PAYLOAD_DREG, priv->dreg) ||
nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
@@ -131,9 +129,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
}
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
- return ERR_PTR(-EINVAL);
+ len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index e8ae2f6bf232..96805d21d618 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -28,7 +28,7 @@ struct nft_queue {
};
static void nft_queue_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_queue *priv = nft_expr_priv(expr);
@@ -51,7 +51,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
}
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 46214f245665..1c30f41cff5b 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -26,25 +26,25 @@ struct nft_rbtree {
struct nft_rbtree_elem {
struct rb_node node;
- u16 flags;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_set_ext ext;
};
-static bool nft_rbtree_lookup(const struct nft_set *set,
- const struct nft_data *key,
- struct nft_data *data)
+
+static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
+ const struct nft_set_ext **ext)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
- const struct rb_node *parent = priv->root.rb_node;
+ const struct rb_node *parent;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int d;
spin_lock_bh(&nft_rbtree_lock);
+ parent = priv->root.rb_node;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = parent->rb_left;
interval = rbe;
@@ -52,12 +52,17 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
parent = parent->rb_right;
else {
found:
- if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(&rbe->ext) &
+ NFT_SET_ELEM_INTERVAL_END)
goto out;
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(data, rbe->data);
-
spin_unlock_bh(&nft_rbtree_lock);
+
+ *ext = &rbe->ext;
return true;
}
}
@@ -71,23 +76,13 @@ out:
return false;
}
-static void nft_rbtree_elem_destroy(const struct nft_set *set,
- struct nft_rbtree_elem *rbe)
-{
- nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(rbe->data, set->dtype);
-
- kfree(rbe);
-}
-
static int __nft_rbtree_insert(const struct nft_set *set,
struct nft_rbtree_elem *new)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
+ u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
int d;
parent = NULL;
@@ -95,13 +90,18 @@ static int __nft_rbtree_insert(const struct nft_set *set,
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext),
+ nft_set_ext_key(&new->ext),
+ set->klen);
if (d < 0)
p = &parent->rb_left;
else if (d > 0)
p = &parent->rb_right;
- else
- return -EEXIST;
+ else {
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ return -EEXIST;
+ p = &parent->rb_left;
+ }
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
@@ -111,31 +111,13 @@ static int __nft_rbtree_insert(const struct nft_set *set,
static int nft_rbtree_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_rbtree_elem *rbe;
- unsigned int size;
+ struct nft_rbtree_elem *rbe = elem->priv;
int err;
- size = sizeof(*rbe);
- if (set->flags & NFT_SET_MAP &&
- !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
- size += sizeof(rbe->data[0]);
-
- rbe = kzalloc(size, GFP_KERNEL);
- if (rbe == NULL)
- return -ENOMEM;
-
- rbe->flags = elem->flags;
- nft_data_copy(&rbe->key, &elem->key);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(rbe->data, &elem->data);
-
spin_lock_bh(&nft_rbtree_lock);
err = __nft_rbtree_insert(set, rbe);
- if (err < 0)
- kfree(rbe);
-
spin_unlock_bh(&nft_rbtree_lock);
+
return err;
}
@@ -143,42 +125,49 @@ static void nft_rbtree_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe = elem->cookie;
+ struct nft_rbtree_elem *rbe = elem->priv;
spin_lock_bh(&nft_rbtree_lock);
rb_erase(&rbe->node, &priv->root);
spin_unlock_bh(&nft_rbtree_lock);
- kfree(rbe);
}
-static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void nft_rbtree_activate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree_elem *rbe = elem->priv;
+
+ nft_set_elem_change_active(set, &rbe->ext);
+}
+
+static void *nft_rbtree_deactivate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int d;
- spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
+ set->klen);
if (d < 0)
parent = parent->rb_left;
else if (d > 0)
parent = parent->rb_right;
else {
- elem->cookie = rbe;
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(&elem->data, rbe->data);
- elem->flags = rbe->flags;
- spin_unlock_bh(&nft_rbtree_lock);
- return 0;
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ nft_set_elem_change_active(set, &rbe->ext);
+ return rbe;
}
}
- spin_unlock_bh(&nft_rbtree_lock);
- return -ENOENT;
+ return NULL;
}
static void nft_rbtree_walk(const struct nft_ctx *ctx,
@@ -186,21 +175,21 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set_iter *iter)
{
const struct nft_rbtree *priv = nft_set_priv(set);
- const struct nft_rbtree_elem *rbe;
+ struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
spin_lock_bh(&nft_rbtree_lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
if (iter->count < iter->skip)
goto cont;
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ goto cont;
- rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_data_copy(&elem.key, &rbe->key);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(&elem.data, rbe->data);
- elem.flags = rbe->flags;
+ elem.priv = rbe;
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0) {
@@ -237,7 +226,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_rbtree_elem_destroy(set, rbe);
+ nft_set_elem_destroy(set, rbe);
}
}
@@ -247,9 +236,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
unsigned int nsize;
nsize = sizeof(struct nft_rbtree_elem);
- if (features & NFT_SET_MAP)
- nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
-
if (desc->size)
est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
else
@@ -262,12 +248,14 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.privsize = nft_rbtree_privsize,
+ .elemsize = offsetof(struct nft_rbtree_elem, ext),
.estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
- .get = nft_rbtree_get,
+ .deactivate = nft_rbtree_deactivate,
+ .activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.features = NFT_SET_INTERVAL | NFT_SET_MAP,
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index d7e9e93a4e90..03f7bf40ae75 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -44,25 +44,28 @@ int nft_redir_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_redir *priv = nft_expr_priv(expr);
+ unsigned int plen;
int err;
err = nft_redir_validate(ctx, expr, NULL);
if (err < 0)
return err;
+ plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
- ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN]));
+ nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
- err = nft_validate_input_register(priv->sreg_proto_min);
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
priv->sreg_proto_max =
- ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX]));
+ nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]);
- err = nft_validate_input_register(priv->sreg_proto_max);
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
@@ -85,11 +88,11 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_redir *priv = nft_expr_priv(expr);
if (priv->sreg_proto_min) {
- if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN,
- htonl(priv->sreg_proto_min)))
+ if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN,
+ priv->sreg_proto_min))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX,
- htonl(priv->sreg_proto_max)))
+ if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX,
+ priv->sreg_proto_max))
goto nla_put_failure;
}
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 7b5f9d58680a..62cabee42fbe 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -18,7 +18,7 @@
#include <net/netfilter/ipv6/nf_reject.h>
static void nft_reject_inet_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
@@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
case NFPROTO_IPV4:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code);
+ nf_send_unreach(pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
break;
case NFT_REJECT_TCP_RST:
nf_send_reset(pkt->skb, pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
nf_send_unreach(pkt->skb,
- nft_reject_icmp_code(priv->icmp_code));
+ nft_reject_icmp_code(priv->icmp_code),
+ pkt->ops->hooknum);
break;
}
break;
@@ -56,7 +58,8 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
}
break;
}
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+
+ regs->verdict.code = NF_DROP;
}
static int nft_reject_inet_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 50e1e5aaf4ce..cca96cec1b68 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t {
static bool tproxy_sk_is_transparent(struct sock *sk)
{
- if (sk->sk_state != TCP_TIME_WAIT) {
- if (inet_sk(sk)->transparent)
- return true;
- sock_put(sk);
- } else {
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
if (inet_twsk(sk)->tw_transparent)
return true;
- inet_twsk_put(inet_twsk(sk));
+ break;
+ case TCP_NEW_SYN_RECV:
+ if (inet_rsk(inet_reqsk(sk))->no_srccheck)
+ return true;
+ break;
+ default:
+ if (inet_sk(sk)->transparent)
+ return true;
}
+
+ sock_gen_put(sk);
return false;
}
@@ -266,7 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
@@ -431,7 +437,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7198d660b4de..a1d126f29463 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -39,7 +39,7 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info *info = par->matchinfo;
- if (skb->sk == NULL)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
return (info->id == skb->sk->sk_classid) ^ info->invert;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index f440f57a452f..1caaccbc306c 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -25,16 +25,15 @@ MODULE_ALIAS("ip6t_physdev");
static bool
physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct xt_physdev_info *info = par->matchinfo;
+ const struct net_device *physdev;
unsigned long ret;
const char *indev, *outdev;
- const struct nf_bridge_info *nf_bridge;
/* Not a bridged IP packet or no info available yet:
* LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
* the destination device will be a bridge. */
- if (!(nf_bridge = skb->nf_bridge)) {
+ if (!skb->nf_bridge) {
/* Return MATCH if the invert flags of the used options are on */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
!(info->invert & XT_PHYSDEV_OP_BRIDGED))
@@ -54,31 +53,41 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
return true;
}
+ physdev = nf_bridge_get_physoutdev(skb);
+ outdev = physdev ? physdev->name : NULL;
+
/* This only makes sense in the FORWARD and POSTROUTING chains */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
- (!!(nf_bridge->mask & BRNF_BRIDGED) ^
- !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+ (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
return false;
+ physdev = nf_bridge_get_physindev(skb);
+ indev = physdev ? physdev->name : NULL;
+
if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
- (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+ (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
(info->bitmask & XT_PHYSDEV_OP_ISOUT &&
- (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
+ (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
return false;
if (!(info->bitmask & XT_PHYSDEV_OP_IN))
goto match_outdev;
- indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
- ret = ifname_compare_aligned(indev, info->physindev, info->in_mask);
- if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
- return false;
+ if (indev) {
+ ret = ifname_compare_aligned(indev, info->physindev,
+ info->in_mask);
+
+ if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+ return false;
+ }
match_outdev:
if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
return true;
- outdev = nf_bridge->physoutdev ?
- nf_bridge->physoutdev->name : nulldevname;
+
+ if (!outdev)
+ return false;
+
ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 0d47afea9682..89045982ec94 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -193,7 +193,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter0(opt.ext.packets, &info->packets))
- return 0;
+ return false;
return match_counter0(opt.ext.bytes, &info->bytes);
}
@@ -239,7 +239,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter(opt.ext.packets, &info->packets))
- return 0;
+ return false;
return match_counter(opt.ext.bytes, &info->bytes);
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 13332dbf291d..e092cb046326 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -129,13 +129,24 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol,
return NULL;
}
-static bool
-socket_match(const struct sk_buff *skb, struct xt_action_param *par,
- const struct xt_socket_mtinfo1 *info)
+static bool xt_socket_sk_is_transparent(struct sock *sk)
+{
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
+ return inet_twsk(sk)->tw_transparent;
+
+ case TCP_NEW_SYN_RECV:
+ return inet_rsk(inet_reqsk(sk))->no_srccheck;
+
+ default:
+ return inet_sk(sk)->transparent;
+ }
+}
+
+static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+ const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
u8 uninitialized_var(protocol);
@@ -145,10 +156,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
#endif
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
protocol = iph->protocol;
saddr = iph->saddr;
@@ -158,16 +171,17 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
- &sport, &dport))
- return false;
+ &sport, &dport))
+ return NULL;
} else {
- return false;
+ return NULL;
}
#ifdef XT_SOCKET_HAVE_CONNTRACK
- /* Do the lookup with the original socket address in case this is a
- * reply packet of an established SNAT-ted connection. */
-
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct) &&
((iph->protocol != IPPROTO_ICMP &&
@@ -183,10 +197,18 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
}
#endif
+ return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+ const struct xt_socket_mtinfo1 *info)
+{
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v4(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -195,16 +217,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* unless XT_SOCKET_NOWILDCARD is set
*/
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
- sk->sk_state != TCP_TIME_WAIT &&
+ sk_fullsock(sk) &&
inet_sk(sk)->inet_rcv_saddr == 0);
/* Ignore non-transparent sockets,
- if XT_SOCKET_TRANSPARENT is used */
+ * if XT_SOCKET_TRANSPARENT is used
+ */
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = ((sk->sk_state != TCP_TIME_WAIT &&
- inet_sk(sk)->transparent) ||
- (sk->sk_state == TCP_TIME_WAIT &&
- inet_twsk(sk)->tw_transparent));
+ transparent = xt_socket_sk_is_transparent(sk);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -213,12 +233,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
sk = NULL;
}
- pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
- protocol, &saddr, ntohs(sport),
- &daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
static bool
@@ -315,28 +330,26 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
return NULL;
}
-static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+ const struct net_device *indev)
{
- struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
- const struct in6_addr *daddr = NULL, *saddr = NULL;
__be16 uninitialized_var(dport), uninitialized_var(sport);
- int thoff = 0, uninitialized_var(tproto);
- const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ const struct in6_addr *daddr = NULL, *saddr = NULL;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0) {
pr_debug("unable to find transport header in IPv6 packet, dropping\n");
- return NF_DROP;
+ return NULL;
}
if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
- hp = skb_header_pointer(skb, thoff,
- sizeof(_hdr), &_hdr);
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
saddr = &iph->saddr;
sport = hp->source;
@@ -344,17 +357,27 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
dport = hp->dest;
} else if (tproto == IPPROTO_ICMPV6) {
+ struct ipv6hdr ipv6_var;
+
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport, &ipv6_var))
- return false;
+ return NULL;
} else {
- return false;
+ return NULL;
}
+ return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v6(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -363,16 +386,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
* unless XT_SOCKET_NOWILDCARD is set
*/
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
- sk->sk_state != TCP_TIME_WAIT &&
+ sk_fullsock(sk) &&
ipv6_addr_any(&sk->sk_v6_rcv_saddr));
/* Ignore non-transparent sockets,
- if XT_SOCKET_TRANSPARENT is used */
+ * if XT_SOCKET_TRANSPARENT is used
+ */
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = ((sk->sk_state != TCP_TIME_WAIT &&
- inet_sk(sk)->transparent) ||
- (sk->sk_state == TCP_TIME_WAIT &&
- inet_twsk(sk)->tw_transparent));
+ transparent = xt_socket_sk_is_transparent(sk);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -381,13 +402,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
sk = NULL;
}
- pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
- "(orig %pI6:%hu) sock %p\n",
- tproto, saddr, ntohs(sport),
- daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
#endif
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 5699adb97652..0bc3460319c8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -26,13 +26,12 @@ static bool
string_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_string_info *conf = par->matchinfo;
- struct ts_state state;
bool invert;
invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
- conf->to_offset, conf->config, &state)
+ conf->to_offset, conf->config)
!= UINT_MAX) ^ invert;
}