aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 09:00:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 09:00:47 -0700
commit6c373ca89399c5a3f7ef210ad8f63dc3437da345 (patch)
tree74d1ec65087df1da1021b43ac51acc1ee8601809 /net/netfilter
parentMerge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm (diff)
parentMerge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue (diff)
downloadlinux-dev-6c373ca89399c5a3f7ef210ad8f63dc3437da345.tar.xz
linux-dev-6c373ca89399c5a3f7ef210ad8f63dc3437da345.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Add BQL support to via-rhine, from Tino Reichardt. 2) Integrate SWITCHDEV layer support into the DSA layer, so DSA drivers can support hw switch offloading. From Floria Fainelli. 3) Allow 'ip address' commands to initiate multicast group join/leave, from Madhu Challa. 4) Many ipv4 FIB lookup optimizations from Alexander Duyck. 5) Support EBPF in cls_bpf classifier and act_bpf action, from Daniel Borkmann. 6) Remove the ugly compat support in ARP for ugly layers like ax25, rose, etc. And use this to clean up the neigh layer, then use it to implement MPLS support. All from Eric Biederman. 7) Support L3 forwarding offloading in switches, from Scott Feldman. 8) Collapse the LOCAL and MAIN ipv4 FIB tables when possible, to speed up route lookups even further. From Alexander Duyck. 9) Many improvements and bug fixes to the rhashtable implementation, from Herbert Xu and Thomas Graf. In particular, in the case where an rhashtable user bulk adds a large number of items into an empty table, we expand the table much more sanely. 10) Don't make the tcp_metrics hash table per-namespace, from Eric Biederman. 11) Extend EBPF to access SKB fields, from Alexei Starovoitov. 12) Split out new connection request sockets so that they can be established in the main hash table. Much less false sharing since hash lookups go direct to the request sockets instead of having to go first to the listener then to the request socks hashed underneath. From Eric Dumazet. 13) Add async I/O support for crytpo AF_ALG sockets, from Tadeusz Struk. 14) Support stable privacy address generation for RFC7217 in IPV6. From Hannes Frederic Sowa. 15) Hash network namespace into IP frag IDs, also from Hannes Frederic Sowa. 16) Convert PTP get/set methods to use 64-bit time, from Richard Cochran. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1816 commits) fm10k: Bump driver version to 0.15.2 fm10k: corrected VF multicast update fm10k: mbx_update_max_size does not drop all oversized messages fm10k: reset head instead of calling update_max_size fm10k: renamed mbx_tx_dropped to mbx_tx_oversized fm10k: update xcast mode before synchronizing multicast addresses fm10k: start service timer on probe fm10k: fix function header comment fm10k: comment next_vf_mbx flow fm10k: don't handle mailbox events in iov_event path and always process mailbox fm10k: use separate workqueue for fm10k driver fm10k: Set PF queues to unlimited bandwidth during virtualization fm10k: expose tx_timeout_count as an ethtool stat fm10k: only increment tx_timeout_count in Tx hang path fm10k: remove extraneous "Reset interface" message fm10k: separate PF only stats so that VF does not display them fm10k: use hw->mac.max_queues for stats fm10k: only show actual queues, not the maximum in hardware fm10k: allow creation of VLAN on default vid fm10k: fix unused warnings ...
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig23
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c31
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c101
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c182
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c102
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c15
-rw-r--r--net/netfilter/nf_conntrack_acct.c8
-rw-r--r--net/netfilter/nf_conntrack_amanda.c10
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_internals.h11
-rw-r--r--net/netfilter/nf_log_common.c7
-rw-r--r--net/netfilter/nf_queue.c76
-rw-r--r--net/netfilter/nf_tables_api.c636
-rw-r--r--net/netfilter/nf_tables_core.c161
-rw-r--r--net/netfilter/nfnetlink_log.c35
-rw-r--r--net/netfilter/nfnetlink_queue_core.c69
-rw-r--r--net/netfilter/nft_bitwise.c37
-rw-r--r--net/netfilter/nft_byteorder.c40
-rw-r--r--net/netfilter/nft_cmp.c44
-rw-r--r--net/netfilter/nft_compat.c41
-rw-r--r--net/netfilter/nft_counter.c3
-rw-r--r--net/netfilter/nft_ct.c118
-rw-r--r--net/netfilter/nft_dynset.c265
-rw-r--r--net/netfilter/nft_expr_template.c94
-rw-r--r--net/netfilter/nft_exthdr.c23
-rw-r--r--net/netfilter/nft_hash.c327
-rw-r--r--net/netfilter/nft_immediate.c18
-rw-r--r--net/netfilter/nft_limit.c5
-rw-r--r--net/netfilter/nft_log.c4
-rw-r--r--net/netfilter/nft_lookup.c35
-rw-r--r--net/netfilter/nft_meta.c116
-rw-r--r--net/netfilter/nft_nat.c71
-rw-r--r--net/netfilter/nft_payload.c24
-rw-r--r--net/netfilter/nft_queue.c4
-rw-r--r--net/netfilter/nft_rbtree.c132
-rw-r--r--net/netfilter/nft_redir.c19
-rw-r--r--net/netfilter/nft_reject_inet.c11
-rw-r--r--net/netfilter/xt_TPROXY.c22
-rw-r--r--net/netfilter/xt_cgroup.c2
-rw-r--r--net/netfilter/xt_physdev.c35
-rw-r--r--net/netfilter/xt_set.c4
-rw-r--r--net/netfilter/xt_socket.c129
-rw-r--r--net/netfilter/xt_string.c3
46 files changed, 1987 insertions, 1167 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index b02660fa9eb0..f70e34a68f70 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -438,8 +438,10 @@ config NF_TABLES
To compile it as a module, choose M here.
+if NF_TABLES
+
config NF_TABLES_INET
- depends on NF_TABLES && IPV6
+ depends on IPV6
select NF_TABLES_IPV4
select NF_TABLES_IPV6
tristate "Netfilter nf_tables mixed IPv4/IPv6 tables support"
@@ -447,21 +449,18 @@ config NF_TABLES_INET
This option enables support for a mixed IPv4/IPv6 "inet" table.
config NFT_EXTHDR
- depends on NF_TABLES
tristate "Netfilter nf_tables IPv6 exthdr module"
help
This option adds the "exthdr" expression that you can use to match
IPv6 extension headers.
config NFT_META
- depends on NF_TABLES
tristate "Netfilter nf_tables meta module"
help
This option adds the "meta" expression that you can use to match and
to set packet metainformation such as the packet mark.
config NFT_CT
- depends on NF_TABLES
depends on NF_CONNTRACK
tristate "Netfilter nf_tables conntrack module"
help
@@ -469,42 +468,36 @@ config NFT_CT
connection tracking information such as the flow state.
config NFT_RBTREE
- depends on NF_TABLES
tristate "Netfilter nf_tables rbtree set module"
help
This option adds the "rbtree" set type (Red Black tree) that is used
to build interval-based sets.
config NFT_HASH
- depends on NF_TABLES
tristate "Netfilter nf_tables hash set module"
help
This option adds the "hash" set type that is used to build one-way
mappings between matchings and actions.
config NFT_COUNTER
- depends on NF_TABLES
tristate "Netfilter nf_tables counter module"
help
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
config NFT_LOG
- depends on NF_TABLES
tristate "Netfilter nf_tables log module"
help
This option adds the "log" expression that you can use to log
packets matching some criteria.
config NFT_LIMIT
- depends on NF_TABLES
tristate "Netfilter nf_tables limit module"
help
This option adds the "limit" expression that you can use to
ratelimit rule matchings.
config NFT_MASQ
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables masquerade support"
@@ -513,7 +506,6 @@ config NFT_MASQ
to perform NAT in the masquerade flavour.
config NFT_REDIR
- depends on NF_TABLES
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables redirect support"
@@ -522,7 +514,6 @@ config NFT_REDIR
to perform NAT in the redirect flavour.
config NFT_NAT
- depends on NF_TABLES
depends on NF_CONNTRACK
select NF_NAT
tristate "Netfilter nf_tables nat module"
@@ -531,8 +522,6 @@ config NFT_NAT
typical Network Address Translation (NAT) packet transformations.
config NFT_QUEUE
- depends on NF_TABLES
- depends on NETFILTER_XTABLES
depends on NETFILTER_NETLINK_QUEUE
tristate "Netfilter nf_tables queue module"
help
@@ -540,7 +529,6 @@ config NFT_QUEUE
infrastructure (also known as NFQUEUE) from nftables.
config NFT_REJECT
- depends on NF_TABLES
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
help
@@ -554,7 +542,6 @@ config NFT_REJECT_INET
tristate
config NFT_COMPAT
- depends on NF_TABLES
depends on NETFILTER_XTABLES
tristate "Netfilter x_tables over nf_tables module"
help
@@ -562,6 +549,8 @@ config NFT_COMPAT
x_tables match/target extensions over the nf_tables
framework.
+endif # NF_TABLES
+
config NETFILTER_XTABLES
tristate "Netfilter Xtables support (required for ip_tables)"
default m if NETFILTER_ADVANCED=n
@@ -951,7 +940,7 @@ comment "Xtables matches"
config NETFILTER_XT_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
- depends on NETFILTER_ADVANCED
+ default m if NETFILTER_ADVANCED=n
---help---
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 89f73a9e9874..a87d8b8ec730 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# nf_tables
nf_tables-objs += nf_tables_core.o nf_tables_api.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index fea9ef566427..e6163017c42d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -120,12 +120,8 @@ EXPORT_SYMBOL(nf_unregister_hooks);
unsigned int nf_iterate(struct list_head *head,
struct sk_buff *skb,
- unsigned int hook,
- const struct net_device *indev,
- const struct net_device *outdev,
- struct nf_hook_ops **elemp,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
+ struct nf_hook_state *state,
+ struct nf_hook_ops **elemp)
{
unsigned int verdict;
@@ -134,19 +130,19 @@ unsigned int nf_iterate(struct list_head *head,
* function because of risk of continuing from deleted element.
*/
list_for_each_entry_continue_rcu((*elemp), head, list) {
- if (hook_thresh > (*elemp)->priority)
+ if (state->thresh > (*elemp)->priority)
continue;
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
+ verdict = (*elemp)->hook(*elemp, skb, state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
- (*elemp)->hook, hook);
+ (*elemp)->hook, state->hook);
continue;
}
#endif
@@ -161,11 +157,7 @@ repeat:
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- int hook_thresh)
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
{
struct nf_hook_ops *elem;
unsigned int verdict;
@@ -174,10 +166,11 @@ int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
- elem = list_entry_rcu(&nf_hooks[pf][hook], struct nf_hook_ops, list);
+ elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
+ struct nf_hook_ops, list);
next_hook:
- verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
- outdev, &elem, okfn, hook_thresh);
+ verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
+ &elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -186,8 +179,8 @@ next_hook:
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
- int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
- verdict >> NF_VERDICT_QBITS);
+ int err = nf_queue(skb, elem, state,
+ verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 758b002130d9..380ef5148ea1 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -19,6 +19,7 @@
#include <net/netlink.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
@@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
#include "ip_set_hash_gen.h"
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static const char *get_physindev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physindev(skb);
+
+ return dev ? dev->name : NULL;
+}
+
+static const char *get_phyoutdev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physoutdev(skb);
+
+ return dev ? dev->name : NULL;
+}
+#endif
+
static int
hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
e.ip &= ip_set_netmask(e.cidr);
#define IFACE(dir) (par->dir ? par->dir->name : NULL)
-#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL)
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
- if (!nf_bridge)
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
e.physdev = 1;
#else
e.iface = NULL;
@@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
- if (!nf_bridge)
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+
e.physdev = 1;
#else
e.iface = NULL;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b87ca32efa0b..5d2b806a862e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.inbytes += skb->len;
+ s->cnt.inpkts++;
+ s->cnt.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
struct ip_vs_service *svc;
s = this_cpu_ptr(dest->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_lock();
svc = rcu_dereference(dest->svc);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
rcu_read_unlock();
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
- s->ustats.outbytes += skb->len;
+ s->cnt.outpkts++;
+ s->cnt.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
}
}
@@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(svc->stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
- s->ustats.conns++;
+ u64_stats_update_begin(&s->syncp);
+ s->cnt.conns++;
+ u64_stats_update_end(&s->syncp);
}
@@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
}
}
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+ int conn_reuse_mode)
+{
+ /* Controlled (FTP DATA or persistence)? */
+ if (cp->control)
+ return false;
+
+ switch (cp->protocol) {
+ case IPPROTO_TCP:
+ return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+ ((conn_reuse_mode & 2) &&
+ (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+ (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+ case IPPROTO_SCTP:
+ return cp->state == IP_VS_SCTP_S_CLOSED;
+ default:
+ return false;
+ }
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
@@ -1246,8 +1272,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
*/
static unsigned int
ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET);
}
@@ -1258,8 +1283,7 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET);
}
@@ -1273,8 +1297,7 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
@@ -1285,8 +1308,7 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_out(ops->hooknum, skb, AF_INET6);
}
@@ -1585,6 +1607,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_conn *cp;
int ret, pkts;
struct netns_ipvs *ipvs;
+ int conn_reuse_mode;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1653,10 +1676,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
- if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
- is_new_conn(skb, &iph)) {
- ip_vs_conn_expire_now(cp);
+ conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+ if (conn_reuse_mode && !iph.fragoffs &&
+ is_new_conn(skb, &iph) && cp &&
+ ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) ||
+ unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
cp = NULL;
}
@@ -1738,9 +1765,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
static unsigned int
ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET);
}
@@ -1751,8 +1776,7 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET);
}
@@ -1765,9 +1789,7 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
@@ -1778,8 +1800,7 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
return ip_vs_in(ops->hooknum, skb, AF_INET6);
}
@@ -1798,8 +1819,7 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
*/
static unsigned int
ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
int r;
struct net *net;
@@ -1820,8 +1840,7 @@ ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ const struct nf_hook_state *state)
{
int r;
struct net *net;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ed99448671c3..49532672f66d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net)
}
static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
{
-#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
spin_lock_bh(&src->lock);
@@ -747,13 +747,28 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
}
static void
+ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
+{
+ dst->conns = (u32)src->conns;
+ dst->inpkts = (u32)src->inpkts;
+ dst->outpkts = (u32)src->outpkts;
+ dst->inbytes = src->inbytes;
+ dst->outbytes = src->outbytes;
+ dst->cps = (u32)src->cps;
+ dst->inpps = (u32)src->inpps;
+ dst->outpps = (u32)src->outpps;
+ dst->inbps = (u32)src->inbps;
+ dst->outbps = (u32)src->outbps;
+}
+
+static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
/* get current counters as zero point, rates are zeroed */
-#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
IP_VS_ZERO_STATS_COUNTER(conns);
IP_VS_ZERO_STATS_COUNTER(inpkts);
@@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_reuse_mode",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = {
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
- struct ip_vs_stats_user show;
+ struct ip_vs_kstats show;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
@@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
" Conns Packets Packets Bytes Bytes\n");
ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
- show.inpkts, show.outpkts,
- (unsigned long long) show.inbytes,
- (unsigned long long) show.outbytes);
-
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)show.conns,
+ (unsigned long long)show.inpkts,
+ (unsigned long long)show.outpkts,
+ (unsigned long long)show.inbytes,
+ (unsigned long long)show.outbytes);
+
+/* 01234567 01234567 01234567 0123456701234567 0123456701234567*/
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, "%8X %8X %8X %16X %16X\n",
- show.cps, show.inpps, show.outpps,
- show.inbps, show.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
+ (unsigned long long)show.cps,
+ (unsigned long long)show.inpps,
+ (unsigned long long)show.outpps,
+ (unsigned long long)show.inbps,
+ (unsigned long long)show.outbps);
return 0;
}
@@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
- struct ip_vs_stats_user rates;
+ struct ip_vs_kstats kstats;
int i;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
do {
start = u64_stats_fetch_begin_irq(&u->syncp);
- inbytes = u->ustats.inbytes;
- outbytes = u->ustats.outbytes;
+ conns = u->cnt.conns;
+ inpkts = u->cnt.inpkts;
+ outpkts = u->cnt.outpkts;
+ inbytes = u->cnt.inbytes;
+ outbytes = u->cnt.outbytes;
} while (u64_stats_fetch_retry_irq(&u->syncp, start));
- seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
- i, u->ustats.conns, u->ustats.inpkts,
- u->ustats.outpkts, (__u64)inbytes,
- (__u64)outbytes);
+ seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
+ i, (u64)conns, (u64)inpkts,
+ (u64)outpkts, (u64)inbytes,
+ (u64)outbytes);
}
- spin_lock_bh(&tot_stats->lock);
-
- seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
- tot_stats->ustats.conns, tot_stats->ustats.inpkts,
- tot_stats->ustats.outpkts,
- (unsigned long long) tot_stats->ustats.inbytes,
- (unsigned long long) tot_stats->ustats.outbytes);
-
- ip_vs_read_estimator(&rates, tot_stats);
+ ip_vs_copy_stats(&kstats, tot_stats);
- spin_unlock_bh(&tot_stats->lock);
+ seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n",
+ (unsigned long long)kstats.conns,
+ (unsigned long long)kstats.inpkts,
+ (unsigned long long)kstats.outpkts,
+ (unsigned long long)kstats.inbytes,
+ (unsigned long long)kstats.outbytes);
-/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+/* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
- " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
- seq_printf(seq, " %8X %8X %8X %16X %16X\n",
- rates.cps,
- rates.inpps,
- rates.outpps,
- rates.inbps,
- rates.outbps);
+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
+ seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n",
+ kstats.cps,
+ kstats.inpps,
+ kstats.outpps,
+ kstats.inbps,
+ kstats.outbps);
return 0;
}
@@ -2400,6 +2426,7 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
+ struct ip_vs_kstats kstats;
sched = rcu_dereference_protected(src->scheduler, 1);
dst->protocol = src->protocol;
@@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
dst->num_dests = src->num_dests;
- ip_vs_copy_stats(&dst->stats, &src->stats);
+ ip_vs_copy_stats(&kstats, &src->stats);
+ ip_vs_export_stats_user(&dst->stats, &kstats);
}
static inline int
@@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
int count = 0;
struct ip_vs_dest *dest;
struct ip_vs_dest_entry entry;
+ struct ip_vs_kstats kstats;
memset(&entry, 0, sizeof(entry));
list_for_each_entry(dest, &svc->destinations, n_list) {
@@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
entry.activeconns = atomic_read(&dest->activeconns);
entry.inactconns = atomic_read(&dest->inactconns);
entry.persistconns = atomic_read(&dest->persistconns);
- ip_vs_copy_stats(&entry.stats, &dest->stats);
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ ip_vs_export_stats_user(&entry.stats, &kstats);
if (copy_to_user(&uptr->entrytable[count],
&entry, sizeof(entry))) {
ret = -EFAULT;
@@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
};
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
- struct ip_vs_stats *stats)
+ struct ip_vs_kstats *kstats)
{
- struct ip_vs_stats_user ustats;
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
if (!nl_stats)
return -EMSGSIZE;
- ip_vs_copy_stats(&ustats, stats);
-
- if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
- nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
- nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
+ if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
+ nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
+ goto nla_put_failure;
+ nla_nest_end(skb, nl_stats);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nl_stats);
+ return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
+ struct ip_vs_kstats *kstats)
+{
+ struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
+ if (!nl_stats)
+ return -EMSGSIZE;
+
+ if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
+ nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
goto nla_put_failure;
nla_nest_end(skb, nl_stats);
@@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
+ struct ip_vs_kstats kstats;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+ ip_vs_copy_stats(&kstats, &svc->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_service);
@@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
{
struct nlattr *nl_dest;
+ struct ip_vs_kstats kstats;
nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
if (!nl_dest)
@@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
atomic_read(&dest->persistconns)) ||
nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
goto nla_put_failure;
- if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+ ip_vs_copy_stats(&kstats, &dest->stats);
+ if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+ goto nla_put_failure;
+ if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
goto nla_put_failure;
nla_nest_end(skb, nl_dest);
@@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
ipvs->sysctl_pmtu_disc = 1;
tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
tbl[idx++].data = &ipvs->sysctl_backup_only;
+ ipvs->sysctl_conn_reuse_mode = 1;
+ tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 1425e9a924c4..ef0eb0a8d552 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -45,17 +45,19 @@
NOTES.
- * The stored value for average bps is scaled by 2^5, so that maximal
- rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+ * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
- * A lot code is taken from net/sched/estimator.c
+ * Netlink users can see 64-bit values but sockopt users are restricted
+ to 32-bit values for conns, packets, bps, cps and pps.
+
+ * A lot of code is taken from net/core/gen_estimator.c
*/
/*
* Make a summary from each cpu
*/
-static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
struct ip_vs_cpu_stats __percpu *stats)
{
int i;
@@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
unsigned int start;
- __u64 inbytes, outbytes;
+ u64 conns, inpkts, outpkts, inbytes, outbytes;
+
if (add) {
- sum->conns += s->ustats.conns;
- sum->inpkts += s->ustats.inpkts;
- sum->outpkts += s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- inbytes = s->ustats.inbytes;
- outbytes = s->ustats.outbytes;
+ conns = s->cnt.conns;
+ inpkts = s->cnt.inpkts;
+ outpkts = s->cnt.outpkts;
+ inbytes = s->cnt.inbytes;
+ outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
+ sum->conns += conns;
+ sum->inpkts += inpkts;
+ sum->outpkts += outpkts;
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
add = true;
- sum->conns = s->ustats.conns;
- sum->inpkts = s->ustats.inpkts;
- sum->outpkts = s->ustats.outpkts;
do {
start = u64_stats_fetch_begin(&s->syncp);
- sum->inbytes = s->ustats.inbytes;
- sum->outbytes = s->ustats.outbytes;
+ sum->conns = s->cnt.conns;
+ sum->inpkts = s->cnt.inpkts;
+ sum->outpkts = s->cnt.outpkts;
+ sum->inbytes = s->cnt.inbytes;
+ sum->outbytes = s->cnt.outbytes;
} while (u64_stats_fetch_retry(&s->syncp, start));
}
}
@@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg)
{
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
- u32 n_conns;
- u32 n_inpkts, n_outpkts;
- u64 n_inbytes, n_outbytes;
- u32 rate;
+ u64 rate;
struct net *net = (struct net *)arg;
struct netns_ipvs *ipvs;
@@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg)
s = container_of(e, struct ip_vs_stats, est);
spin_lock(&s->lock);
- ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
- n_conns = s->ustats.conns;
- n_inpkts = s->ustats.inpkts;
- n_outpkts = s->ustats.outpkts;
- n_inbytes = s->ustats.inbytes;
- n_outbytes = s->ustats.outbytes;
+ ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
/* scaled by 2^10, but divided 2 seconds */
- rate = (n_conns - e->last_conns) << 9;
- e->last_conns = n_conns;
- e->cps += ((long)rate - (long)e->cps) >> 2;
-
- rate = (n_inpkts - e->last_inpkts) << 9;
- e->last_inpkts = n_inpkts;
- e->inpps += ((long)rate - (long)e->inpps) >> 2;
-
- rate = (n_outpkts - e->last_outpkts) << 9;
- e->last_outpkts = n_outpkts;
- e->outpps += ((long)rate - (long)e->outpps) >> 2;
-
- rate = (n_inbytes - e->last_inbytes) << 4;
- e->last_inbytes = n_inbytes;
- e->inbps += ((long)rate - (long)e->inbps) >> 2;
-
- rate = (n_outbytes - e->last_outbytes) << 4;
- e->last_outbytes = n_outbytes;
- e->outbps += ((long)rate - (long)e->outbps) >> 2;
+ rate = (s->kstats.conns - e->last_conns) << 9;
+ e->last_conns = s->kstats.conns;
+ e->cps += ((s64)rate - (s64)e->cps) >> 2;
+
+ rate = (s->kstats.inpkts - e->last_inpkts) << 9;
+ e->last_inpkts = s->kstats.inpkts;
+ e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
+
+ rate = (s->kstats.outpkts - e->last_outpkts) << 9;
+ e->last_outpkts = s->kstats.outpkts;
+ e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
+
+ /* scaled by 2^5, but divided 2 seconds */
+ rate = (s->kstats.inbytes - e->last_inbytes) << 4;
+ e->last_inbytes = s->kstats.inbytes;
+ e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
+
+ rate = (s->kstats.outbytes - e->last_outbytes) << 4;
+ e->last_outbytes = s->kstats.outbytes;
+ e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
@@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
- struct ip_vs_stats_user *u = &stats->ustats;
+ struct ip_vs_kstats *k = &stats->kstats;
/* reset counters, caller must hold the stats->lock lock */
- est->last_inbytes = u->inbytes;
- est->last_outbytes = u->outbytes;
- est->last_conns = u->conns;
- est->last_inpkts = u->inpkts;
- est->last_outpkts = u->outpkts;
+ est->last_inbytes = k->inbytes;
+ est->last_outbytes = k->outbytes;
+ est->last_conns = k->conns;
+ est->last_inpkts = k->inpkts;
+ est->last_outpkts = k->outpkts;
est->cps = 0;
est->inpps = 0;
est->outpps = 0;
@@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
}
/* Get decoded rates */
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
- struct ip_vs_stats *stats)
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *e = &stats->est;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d93ceeb3ef04..19b9cce6c210 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
- else
+ if (cp && ((cp->dport != dport) ||
+ !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+ if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ cp = NULL;
+ } else {
+ /* This is the expiration message for the
+ * connection that was already replaced, so we
+ * just ignore it.
+ */
+ __ip_vs_conn_put(cp);
+ kfree(param->pe_data);
+ return;
+ }
+ }
+ } else {
cp = ip_vs_ct_in_get(param);
+ }
if (cp) {
/* Free pe_data */
@@ -1388,9 +1405,11 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
mreq.imr_ifindex = dev->ifindex;
+ rtnl_lock();
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
+ rtnl_unlock();
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3aedbda7658a..19986ec5f21a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
struct sock *sk = skb->sk;
struct rtable *ort = skb_rtable(skb);
- if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
+ if (!skb->dev && sk && sk_fullsock(sk))
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
@@ -536,8 +536,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_update_conntrack(skb, cp, 1);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
@@ -554,8 +554,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
if (!local) {
skb_forward_csum(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
- dst_output);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
@@ -924,7 +924,8 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct net *net = skb_net(skb);
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
@@ -991,7 +992,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = ttl;
- ip_select_ident(skb, NULL);
+ ip_select_ident(net, skb, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->ignore_df = 1;
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index a4b5e2a435ac..45da11afa785 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -47,9 +47,11 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
return 0;
counter = acct->counter;
- return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)atomic64_read(&counter[dir].packets),
- (unsigned long long)atomic64_read(&counter[dir].bytes));
+ seq_printf(s, "packets=%llu bytes=%llu ",
+ (unsigned long long)atomic64_read(&counter[dir].packets),
+ (unsigned long long)atomic64_read(&counter[dir].bytes));
+
+ return 0;
};
EXPORT_SYMBOL_GPL(seq_print_acct);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index b8b95f4027ca..57a26cc90c9f 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -88,7 +88,6 @@ static int amanda_help(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- struct ts_state ts;
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple *tuple;
unsigned int dataoff, start, stop, off, i;
@@ -113,23 +112,20 @@ static int amanda_help(struct sk_buff *skb,
return NF_ACCEPT;
}
- memset(&ts, 0, sizeof(ts));
start = skb_find_text(skb, dataoff, skb->len,
- search[SEARCH_CONNECT].ts, &ts);
+ search[SEARCH_CONNECT].ts);
if (start == UINT_MAX)
goto out;
start += dataoff + search[SEARCH_CONNECT].len;
- memset(&ts, 0, sizeof(ts));
stop = skb_find_text(skb, start, skb->len,
- search[SEARCH_NEWLINE].ts, &ts);
+ search[SEARCH_NEWLINE].ts);
if (stop == UINT_MAX)
goto out;
stop += start;
for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
- memset(&ts, 0, sizeof(ts));
- off = skb_find_text(skb, start, stop, search[i].ts, &ts);
+ off = skb_find_text(skb, start, stop, search[i].ts);
if (off == UINT_MAX)
continue;
off += start + search[i].len;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 91a1837acd0e..7a17070c5dab 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -561,7 +561,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
helper->expect_policy[expect->class].name);
}
- return seq_putc(s, '\n');
+ seq_putc(s, '\n');
+
+ return 0;
}
static const struct seq_operations exp_seq_ops = {
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 61a3c927e63c..ea7f36784b3d 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,16 +14,11 @@
/* core.c */
unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
- unsigned int hook, const struct net_device *indev,
- const struct net_device *outdev,
- struct nf_hook_ops **elemp,
- int (*okfn)(struct sk_buff *), int hook_thresh);
+ struct nf_hook_state *state, struct nf_hook_ops **elemp);
/* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf,
- unsigned int hook, struct net_device *indev,
- struct net_device *outdev, int (*okfn)(struct sk_buff *),
- unsigned int queuenum);
+int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
+ struct nf_hook_state *state, unsigned int queuenum);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a2233e77cf39..a5aa5967b8e1 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -17,6 +17,7 @@
#include <net/route.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
@@ -133,7 +134,7 @@ EXPORT_SYMBOL_GPL(nf_log_dump_tcp_header);
void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
{
- if (!sk || sk->sk_state == TCP_TIME_WAIT)
+ if (!sk || !sk_fullsock(sk))
return;
read_lock_bh(&sk->sk_callback_lock);
@@ -163,10 +164,10 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
const struct net_device *physindev;
const struct net_device *physoutdev;
- physindev = skb->nf_bridge->physindev;
+ physindev = nf_bridge_get_physindev(skb);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
+ physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && out != physoutdev)
nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4c8b68e5fa16..2e88032cd5ad 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <net/protocol.h>
@@ -47,19 +48,25 @@ EXPORT_SYMBOL(nf_unregister_queue_handler);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
+ struct nf_hook_state *state = &entry->state;
+
/* Release those devices we held, or Alexey will kill me. */
- if (entry->indev)
- dev_put(entry->indev);
- if (entry->outdev)
- dev_put(entry->outdev);
+ if (state->in)
+ dev_put(state->in);
+ if (state->out)
+ dev_put(state->out);
+ if (state->sk)
+ sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+ struct net_device *physdev;
- if (nf_bridge->physindev)
- dev_put(nf_bridge->physindev);
- if (nf_bridge->physoutdev)
- dev_put(nf_bridge->physoutdev);
+ physdev = nf_bridge_get_physindev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
+ physdev = nf_bridge_get_physoutdev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
}
#endif
/* Drop reference to owner of hook which queued us. */
@@ -70,22 +77,25 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
+ struct nf_hook_state *state = &entry->state;
+
if (!try_module_get(entry->elem->owner))
return false;
- if (entry->indev)
- dev_hold(entry->indev);
- if (entry->outdev)
- dev_hold(entry->outdev);
+ if (state->in)
+ dev_hold(state->in);
+ if (state->out)
+ dev_hold(state->out);
+ if (state->sk)
+ sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
struct net_device *physdev;
- physdev = nf_bridge->physindev;
+ physdev = nf_bridge_get_physindev(entry->skb);
if (physdev)
dev_hold(physdev);
- physdev = nf_bridge->physoutdev;
+ physdev = nf_bridge_get_physoutdev(entry->skb);
if (physdev)
dev_hold(physdev);
}
@@ -100,12 +110,9 @@ EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
* through nf_reinject().
*/
int nf_queue(struct sk_buff *skb,
- struct nf_hook_ops *elem,
- u_int8_t pf, unsigned int hook,
- struct net_device *indev,
- struct net_device *outdev,
- int (*okfn)(struct sk_buff *),
- unsigned int queuenum)
+ struct nf_hook_ops *elem,
+ struct nf_hook_state *state,
+ unsigned int queuenum)
{
int status = -ENOENT;
struct nf_queue_entry *entry = NULL;
@@ -121,7 +128,7 @@ int nf_queue(struct sk_buff *skb,
goto err_unlock;
}
- afinfo = nf_get_afinfo(pf);
+ afinfo = nf_get_afinfo(state->pf);
if (!afinfo)
goto err_unlock;
@@ -134,11 +141,7 @@ int nf_queue(struct sk_buff *skb,
*entry = (struct nf_queue_entry) {
.skb = skb,
.elem = elem,
- .pf = pf,
- .hook = hook,
- .indev = indev,
- .outdev = outdev,
- .okfn = okfn,
+ .state = *state,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -184,30 +187,29 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
}
if (verdict == NF_ACCEPT) {
- afinfo = nf_get_afinfo(entry->pf);
+ afinfo = nf_get_afinfo(entry->state.pf);
if (!afinfo || afinfo->reroute(skb, entry) < 0)
verdict = NF_DROP;
}
+ entry->state.thresh = INT_MIN;
+
if (verdict == NF_ACCEPT) {
next_hook:
- verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook],
- skb, entry->hook,
- entry->indev, entry->outdev, &elem,
- entry->okfn, INT_MIN);
+ verdict = nf_iterate(&nf_hooks[entry->state.pf][entry->state.hook],
+ skb, &entry->state, &elem);
}
switch (verdict & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_STOP:
local_bh_disable();
- entry->okfn(skb);
+ entry->state.okfn(entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, elem, entry->pf, entry->hook,
- entry->indev, entry->outdev, entry->okfn,
- verdict >> NF_VERDICT_QBITS);
+ err = nf_queue(skb, elem, &entry->state,
+ verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ac1a9528dbf2..78af83bc9c8e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -198,36 +198,31 @@ static int nft_delchain(struct nft_ctx *ctx)
static inline bool
nft_rule_is_active(struct net *net, const struct nft_rule *rule)
{
- return (rule->genmask & (1 << net->nft.gencursor)) == 0;
-}
-
-static inline int gencursor_next(struct net *net)
-{
- return net->nft.gencursor+1 == 1 ? 1 : 0;
+ return (rule->genmask & nft_genmask_cur(net)) == 0;
}
static inline int
nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
{
- return (rule->genmask & (1 << gencursor_next(net))) == 0;
+ return (rule->genmask & nft_genmask_next(net)) == 0;
}
static inline void
nft_rule_activate_next(struct net *net, struct nft_rule *rule)
{
/* Now inactive, will be active in the future */
- rule->genmask = (1 << net->nft.gencursor);
+ rule->genmask = nft_genmask_cur(net);
}
static inline void
nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
{
- rule->genmask = (1 << gencursor_next(net));
+ rule->genmask = nft_genmask_next(net);
}
static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
{
- rule->genmask &= ~(1 << gencursor_next(net));
+ rule->genmask &= ~nft_genmask_next(net);
}
static int
@@ -401,7 +396,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
}
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
- [NFTA_TABLE_NAME] = { .type = NLA_STRING },
+ [NFTA_TABLE_NAME] = { .type = NLA_STRING,
+ .len = NFT_TABLE_MAXNAMELEN - 1 },
[NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
};
@@ -686,26 +682,28 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
if (!try_module_get(afi->owner))
return -EAFNOSUPPORT;
- table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
- if (table == NULL) {
- module_put(afi->owner);
- return -ENOMEM;
- }
+ err = -ENOMEM;
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
+ if (table == NULL)
+ goto err1;
- nla_strlcpy(table->name, name, nla_len(name));
+ nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
table->flags = flags;
nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
- if (err < 0) {
- kfree(table);
- module_put(afi->owner);
- return err;
- }
+ if (err < 0)
+ goto err2;
+
list_add_tail_rcu(&table->list, &afi->tables);
return 0;
+err2:
+ kfree(table);
+err1:
+ module_put(afi->owner);
+ return err;
}
static int nft_flush_table(struct nft_ctx *ctx)
@@ -1351,6 +1349,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
rcu_assign_pointer(basechain->stats, stats);
}
+ write_pnet(&basechain->pnet, net);
basechain->type = type;
chain = &basechain->chain;
@@ -1378,7 +1377,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
INIT_LIST_HEAD(&chain->rules);
chain->handle = nf_tables_alloc_handle(table);
- chain->net = net;
chain->table = table;
nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
@@ -1547,6 +1545,23 @@ nla_put_failure:
return -1;
};
+int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
+ const struct nft_expr *expr)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, attr);
+ if (!nest)
+ goto nla_put_failure;
+ if (nf_tables_fill_expr_info(skb, expr) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
struct nft_expr_info {
const struct nft_expr_ops *ops;
struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
@@ -1624,6 +1639,39 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
module_put(expr->ops->type->owner);
}
+struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
+ const struct nlattr *nla)
+{
+ struct nft_expr_info info;
+ struct nft_expr *expr;
+ int err;
+
+ err = nf_tables_expr_parse(ctx, nla, &info);
+ if (err < 0)
+ goto err1;
+
+ err = -ENOMEM;
+ expr = kzalloc(info.ops->size, GFP_KERNEL);
+ if (expr == NULL)
+ goto err2;
+
+ err = nf_tables_newexpr(ctx, &info, expr);
+ if (err < 0)
+ goto err2;
+
+ return expr;
+err2:
+ module_put(info.ops->type->owner);
+err1:
+ return ERR_PTR(err);
+}
+
+void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
+{
+ nf_tables_expr_destroy(ctx, expr);
+ kfree(expr);
+}
+
/*
* Rules
*/
@@ -1705,12 +1753,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
if (list == NULL)
goto nla_put_failure;
nft_rule_for_each_expr(expr, next, rule) {
- struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
- if (elem == NULL)
+ if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0)
goto nla_put_failure;
- if (nf_tables_fill_expr_info(skb, expr) < 0)
- goto nla_put_failure;
- nla_nest_end(skb, elem);
}
nla_nest_end(skb, list);
@@ -2161,7 +2205,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
features = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
- features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
}
bops = NULL;
@@ -2218,6 +2262,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
[NFTA_SET_POLICY] = { .type = NLA_U32 },
[NFTA_SET_DESC] = { .type = NLA_NESTED },
[NFTA_SET_ID] = { .type = NLA_U32 },
+ [NFTA_SET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2368,6 +2414,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
goto nla_put_failure;
}
+ if (set->timeout &&
+ nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
+ goto nla_put_failure;
+ if (set->gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ goto nla_put_failure;
+
if (set->policy != NFT_SET_POL_PERFORMANCE) {
if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
goto nla_put_failure;
@@ -2580,7 +2633,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, dtype, flags, policy;
+ u64 timeout;
+ u32 ktype, dtype, flags, policy, gc_int;
struct nft_set_desc desc;
int err;
@@ -2600,15 +2654,20 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
}
desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
- if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
+ if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;
flags = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
- NFT_SET_INTERVAL | NFT_SET_MAP))
+ NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
+ NFT_SET_MAP | NFT_SET_EVAL))
return -EINVAL;
+ /* Only one of both operations is supported */
+ if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) ==
+ (NFT_SET_MAP | NFT_SET_EVAL))
+ return -EOPNOTSUPP;
}
dtype = 0;
@@ -2625,14 +2684,26 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
- if (desc.dlen == 0 ||
- desc.dlen > FIELD_SIZEOF(struct nft_data, data))
+ if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
return -EINVAL;
} else
- desc.dlen = sizeof(struct nft_data);
+ desc.dlen = sizeof(struct nft_verdict);
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ timeout = 0;
+ if (nla[NFTA_SET_TIMEOUT] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+ }
+ gc_int = 0;
+ if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ }
+
policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
@@ -2692,6 +2763,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
goto err2;
INIT_LIST_HEAD(&set->bindings);
+ write_pnet(&set->pnet, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
@@ -2700,6 +2772,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
set->flags = flags;
set->size = desc.size;
set->policy = policy;
+ set->timeout = timeout;
+ set->gc_int = gc_int;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2768,12 +2842,14 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
enum nft_registers dreg;
dreg = nft_type_to_reg(set->dtype);
- return nft_validate_data_load(ctx, dreg, &elem->data,
- set->dtype == NFT_DATA_VERDICT ?
- NFT_DATA_VERDICT : NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext),
+ set->dtype == NFT_DATA_VERDICT ?
+ NFT_DATA_VERDICT : NFT_DATA_VALUE,
+ set->dlen);
}
int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
@@ -2785,12 +2861,13 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
return -EBUSY;
- if (set->flags & NFT_SET_MAP) {
+ if (binding->flags & NFT_SET_MAP) {
/* If the set is already bound to the same chain all
* jumps are already validated for that chain.
*/
list_for_each_entry(i, &set->bindings, list) {
- if (i->chain == binding->chain)
+ if (binding->flags & NFT_SET_MAP &&
+ i->chain == binding->chain)
goto bind;
}
@@ -2824,6 +2901,35 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
nf_tables_set_destroy(ctx, set);
}
+const struct nft_set_ext_type nft_set_ext_types[] = {
+ [NFT_SET_EXT_KEY] = {
+ .align = __alignof__(u32),
+ },
+ [NFT_SET_EXT_DATA] = {
+ .align = __alignof__(u32),
+ },
+ [NFT_SET_EXT_EXPR] = {
+ .align = __alignof__(struct nft_expr),
+ },
+ [NFT_SET_EXT_FLAGS] = {
+ .len = sizeof(u8),
+ .align = __alignof__(u8),
+ },
+ [NFT_SET_EXT_TIMEOUT] = {
+ .len = sizeof(u64),
+ .align = __alignof__(u64),
+ },
+ [NFT_SET_EXT_EXPIRATION] = {
+ .len = sizeof(unsigned long),
+ .align = __alignof__(unsigned long),
+ },
+ [NFT_SET_EXT_USERDATA] = {
+ .len = sizeof(struct nft_userdata),
+ .align = __alignof__(struct nft_userdata),
+ },
+};
+EXPORT_SYMBOL_GPL(nft_set_ext_types);
+
/*
* Set elements
*/
@@ -2832,6 +2938,9 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
[NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
+ [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2870,6 +2979,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
@@ -2877,20 +2987,52 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
if (nest == NULL)
goto nla_put_failure;
- if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
- set->klen) < 0)
+ if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, nft_set_ext_key(ext),
+ NFT_DATA_VALUE, set->klen) < 0)
goto nla_put_failure;
- if (set->flags & NFT_SET_MAP &&
- !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
- nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+ nft_data_dump(skb, NFTA_SET_ELEM_DATA, nft_set_ext_data(ext),
set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
set->dlen) < 0)
goto nla_put_failure;
- if (elem->flags != 0)
- if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) &&
+ nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
+ htonl(*nft_set_ext_flags(ext))))
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+ cpu_to_be64(*nft_set_ext_timeout(ext))))
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ unsigned long expires, now = jiffies;
+
+ expires = *nft_set_ext_expiration(ext);
+ if (time_before(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ cpu_to_be64(jiffies_to_msecs(expires))))
goto nla_put_failure;
+ }
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+ struct nft_userdata *udata;
+
+ udata = nft_set_ext_userdata(ext);
+ if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+ udata->len + 1, udata->data))
+ goto nla_put_failure;
+ }
nla_nest_end(skb, nest);
return 0;
@@ -3111,20 +3253,65 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
return trans;
}
+void *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const u32 *key, const u32 *data,
+ u64 timeout, gfp_t gfp)
+{
+ struct nft_set_ext *ext;
+ void *elem;
+
+ elem = kzalloc(set->ops->elemsize + tmpl->len, gfp);
+ if (elem == NULL)
+ return NULL;
+
+ ext = nft_set_elem_ext(set, elem);
+ nft_set_ext_init(ext, tmpl);
+
+ memcpy(nft_set_ext_key(ext), key, set->klen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ memcpy(nft_set_ext_data(ext), data, set->dlen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
+ *nft_set_ext_expiration(ext) =
+ jiffies + msecs_to_jiffies(timeout);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+ *nft_set_ext_timeout(ext) = timeout;
+
+ return elem;
+}
+
+void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+{
+ struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+
+ nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
+ nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+
+ kfree(elem);
+}
+EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+
static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_data_desc d1, d2;
+ struct nft_set_ext_tmpl tmpl;
+ struct nft_set_ext *ext;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_userdata *udata;
+ struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
+ u64 timeout;
+ u32 flags;
+ u8 ulen;
int err;
- if (set->size && set->nelems == set->size)
- return -ENFILE;
-
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -3133,38 +3320,59 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_KEY] == NULL)
return -EINVAL;
- elem.flags = 0;
+ nft_set_ext_prepare(&tmpl);
+
+ flags = 0;
if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
- elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
- if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
+ flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
+ if (flags & ~NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_INTERVAL) &&
+ flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
+ if (flags != 0)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
}
if (set->flags & NFT_SET_MAP) {
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
- !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
+ !(flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
if (nla[NFTA_SET_ELEM_DATA] != NULL &&
- elem.flags & NFT_SET_ELEM_INTERVAL_END)
+ flags & NFT_SET_ELEM_INTERVAL_END)
return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_DATA] != NULL)
return -EINVAL;
}
- err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
+ timeout = 0;
+ if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+ } else if (set->flags & NFT_SET_TIMEOUT) {
+ timeout = set->timeout;
+ }
+
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
err = -EINVAL;
if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
goto err2;
- err = -EEXIST;
- if (set->ops->get(set, &elem) == 0)
- goto err2;
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len);
+ if (timeout > 0) {
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (timeout != set->timeout)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ }
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
- err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
+ err = nft_data_init(ctx, &data, sizeof(data), &d2,
+ nla[NFTA_SET_ELEM_DATA]);
if (err < 0)
goto err2;
@@ -3180,32 +3388,68 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
.chain = (struct nft_chain *)binding->chain,
};
- err = nft_validate_data_load(&bind_ctx, dreg,
- &elem.data, d2.type);
+ if (!(binding->flags & NFT_SET_MAP))
+ continue;
+
+ err = nft_validate_register_store(&bind_ctx, dreg,
+ &data,
+ d2.type, d2.len);
if (err < 0)
goto err3;
}
+
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
+ }
+
+ /* The full maximum length of userdata can exceed the maximum
+ * offset value (U8_MAX) for following extensions, therefor it
+ * must be the last extension added.
+ */
+ ulen = 0;
+ if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+ ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+ if (ulen > 0)
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
+ }
+
+ err = -ENOMEM;
+ elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
+ timeout, GFP_KERNEL);
+ if (elem.priv == NULL)
+ goto err3;
+
+ ext = nft_set_elem_ext(set, elem.priv);
+ if (flags)
+ *nft_set_ext_flags(ext) = flags;
+ if (ulen > 0) {
+ udata = nft_set_ext_userdata(ext);
+ udata->len = ulen - 1;
+ nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
- goto err3;
+ goto err4;
+ ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
err = set->ops->insert(set, &elem);
if (err < 0)
- goto err4;
+ goto err5;
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
-err4:
+err5:
kfree(trans);
+err4:
+ kfree(elem.priv);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
- nft_data_uninit(&elem.data, d2.type);
+ nft_data_uninit(&data, d2.type);
err2:
- nft_data_uninit(&elem.key, d1.type);
+ nft_data_uninit(&elem.key.val, d1.type);
err1:
return err;
}
@@ -3241,11 +3485,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
+ return -ENFILE;
+
err = nft_add_set_elem(&ctx, set, attr);
- if (err < 0)
+ if (err < 0) {
+ atomic_dec(&set->nelems);
break;
-
- set->nelems++;
+ }
}
return err;
}
@@ -3268,7 +3516,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_KEY] == NULL)
goto err1;
- err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]);
+ err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
@@ -3276,21 +3525,26 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
goto err2;
- err = set->ops->get(set, &elem);
- if (err < 0)
- goto err2;
-
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
+ elem.priv = set->ops->deactivate(set, &elem);
+ if (elem.priv == NULL) {
+ err = -ENOENT;
+ goto err3;
+ }
+
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+
+err3:
+ kfree(trans);
err2:
- nft_data_uninit(&elem.key, desc.type);
+ nft_data_uninit(&elem.key.val, desc.type);
err1:
return err;
}
@@ -3322,11 +3576,36 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
break;
- set->nelems--;
+ set->ndeact++;
}
return err;
}
+void nft_set_gc_batch_release(struct rcu_head *rcu)
+{
+ struct nft_set_gc_batch *gcb;
+ unsigned int i;
+
+ gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+ for (i = 0; i < gcb->head.cnt; i++)
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ kfree(gcb);
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+ gfp_t gfp)
+{
+ struct nft_set_gc_batch *gcb;
+
+ gcb = kzalloc(sizeof(*gcb), gfp);
+ if (gcb == NULL)
+ return gcb;
+ gcb->head.set = set;
+ return gcb;
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
+
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
@@ -3526,6 +3805,10 @@ static void nf_tables_commit_release(struct nft_trans *trans)
case NFT_MSG_DELSET:
nft_set_destroy(nft_trans_set(trans));
break;
+ case NFT_MSG_DELSETELEM:
+ nft_set_elem_destroy(nft_trans_elem_set(trans),
+ nft_trans_elem(trans).priv);
+ break;
}
kfree(trans);
}
@@ -3540,7 +3823,7 @@ static int nf_tables_commit(struct sk_buff *skb)
while (++net->nft.base_seq == 0);
/* A new generation has just started */
- net->nft.gencursor = gencursor_next(net);
+ net->nft.gencursor = nft_gencursor_next(net);
/* Make sure all packets have left the previous generation before
* purging old rules.
@@ -3611,24 +3894,23 @@ static int nf_tables_commit(struct sk_buff *skb)
NFT_MSG_DELSET, GFP_KERNEL);
break;
case NFT_MSG_NEWSETELEM:
- nf_tables_setelem_notify(&trans->ctx,
- nft_trans_elem_set(trans),
- &nft_trans_elem(trans),
+ te = (struct nft_trans_elem *)trans->data;
+
+ te->set->ops->activate(te->set, &te->elem);
+ nf_tables_setelem_notify(&trans->ctx, te->set,
+ &te->elem,
NFT_MSG_NEWSETELEM, 0);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
+
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_DELSETELEM, 0);
- te->set->ops->get(te->set, &te->elem);
- nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
- if (te->set->flags & NFT_SET_MAP &&
- !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(&te->elem.data, te->set->dtype);
te->set->ops->remove(te->set, &te->elem);
- nft_trans_destroy(trans);
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
break;
}
}
@@ -3660,6 +3942,10 @@ static void nf_tables_abort_release(struct nft_trans *trans)
case NFT_MSG_NEWSET:
nft_set_destroy(nft_trans_set(trans));
break;
+ case NFT_MSG_NEWSETELEM:
+ nft_set_elem_destroy(nft_trans_elem_set(trans),
+ nft_trans_elem(trans).priv);
+ break;
}
kfree(trans);
}
@@ -3728,18 +4014,17 @@ static int nf_tables_abort(struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- nft_trans_elem_set(trans)->nelems--;
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->get(te->set, &te->elem);
- nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
- if (te->set->flags & NFT_SET_MAP &&
- !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(&te->elem.data, te->set->dtype);
+
te->set->ops->remove(te->set, &te->elem);
- nft_trans_destroy(trans);
+ atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
- nft_trans_elem_set(trans)->nelems++;
+ te = (struct nft_trans_elem *)trans->data;
+
+ te->set->ops->activate(te->set, &te->elem);
+ te->set->ndeact--;
+
nft_trans_destroy(trans);
break;
}
@@ -3814,13 +4099,18 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
- if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_data *data;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
return 0;
- switch (elem->data.verdict) {
+ data = nft_set_ext_data(ext);
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- return nf_tables_check_loops(ctx, elem->data.chain);
+ return nf_tables_check_loops(ctx, data->verdict.chain);
default:
return 0;
}
@@ -3853,10 +4143,11 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
if (data == NULL)
continue;
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- err = nf_tables_check_loops(ctx, data->chain);
+ err = nf_tables_check_loops(ctx,
+ data->verdict.chain);
if (err < 0)
return err;
default:
@@ -3871,7 +4162,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
continue;
list_for_each_entry(binding, &set->bindings, list) {
- if (binding->chain != chain)
+ if (!(binding->flags & NFT_SET_MAP) ||
+ binding->chain != chain)
continue;
iter.skip = 0;
@@ -3889,85 +4181,129 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
}
/**
- * nft_validate_input_register - validate an expressions' input register
+ * nft_parse_register - parse a register value from a netlink attribute
*
- * @reg: the register number
+ * @attr: netlink attribute
*
- * Validate that the input register is one of the general purpose
- * registers.
+ * Parse and translate a register value from a netlink attribute.
+ * Registers used to be 128 bit wide, these register numbers will be
+ * mapped to the corresponding 32 bit register numbers.
*/
-int nft_validate_input_register(enum nft_registers reg)
+unsigned int nft_parse_register(const struct nlattr *attr)
{
- if (reg <= NFT_REG_VERDICT)
- return -EINVAL;
- if (reg > NFT_REG_MAX)
- return -ERANGE;
- return 0;
+ unsigned int reg;
+
+ reg = ntohl(nla_get_be32(attr));
+ switch (reg) {
+ case NFT_REG_VERDICT...NFT_REG_4:
+ return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ default:
+ return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ }
}
-EXPORT_SYMBOL_GPL(nft_validate_input_register);
+EXPORT_SYMBOL_GPL(nft_parse_register);
/**
- * nft_validate_output_register - validate an expressions' output register
+ * nft_dump_register - dump a register value to a netlink attribute
+ *
+ * @skb: socket buffer
+ * @attr: attribute number
+ * @reg: register number
+ *
+ * Construct a netlink attribute containing the register number. For
+ * compatibility reasons, register numbers being a multiple of 4 are
+ * translated to the corresponding 128 bit register numbers.
+ */
+int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
+{
+ if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
+ reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
+ else
+ reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;
+
+ return nla_put_be32(skb, attr, htonl(reg));
+}
+EXPORT_SYMBOL_GPL(nft_dump_register);
+
+/**
+ * nft_validate_register_load - validate a load from a register
*
* @reg: the register number
+ * @len: the length of the data
*
- * Validate that the output register is one of the general purpose
- * registers or the verdict register.
+ * Validate that the input register is one of the general purpose
+ * registers and that the length of the load is within the bounds.
*/
-int nft_validate_output_register(enum nft_registers reg)
+int nft_validate_register_load(enum nft_registers reg, unsigned int len)
{
- if (reg < NFT_REG_VERDICT)
+ if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
- if (reg > NFT_REG_MAX)
+ if (len == 0)
+ return -EINVAL;
+ if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
return -ERANGE;
+
return 0;
}
-EXPORT_SYMBOL_GPL(nft_validate_output_register);
+EXPORT_SYMBOL_GPL(nft_validate_register_load);
/**
- * nft_validate_data_load - validate an expressions' data load
+ * nft_validate_register_store - validate an expressions' register store
*
* @ctx: context of the expression performing the load
* @reg: the destination register number
* @data: the data to load
* @type: the data type
+ * @len: the length of the data
*
* Validate that a data load uses the appropriate data type for
- * the destination register. A value of NULL for the data means
- * that its runtime gathered data, which is always of type
- * NFT_DATA_VALUE.
+ * the destination register and the length is within the bounds.
+ * A value of NULL for the data means that its runtime gathered
+ * data.
*/
-int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type)
+int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
{
int err;
switch (reg) {
case NFT_REG_VERDICT:
- if (data == NULL || type != NFT_DATA_VERDICT)
+ if (type != NFT_DATA_VERDICT)
return -EINVAL;
- if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) {
- err = nf_tables_check_loops(ctx, data->chain);
+ if (data != NULL &&
+ (data->verdict.code == NFT_GOTO ||
+ data->verdict.code == NFT_JUMP)) {
+ err = nf_tables_check_loops(ctx, data->verdict.chain);
if (err < 0)
return err;
- if (ctx->chain->level + 1 > data->chain->level) {
+ if (ctx->chain->level + 1 >
+ data->verdict.chain->level) {
if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
return -EMLINK;
- data->chain->level = ctx->chain->level + 1;
+ data->verdict.chain->level = ctx->chain->level + 1;
}
}
return 0;
default:
+ if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
+ return -EINVAL;
+ if (len == 0)
+ return -EINVAL;
+ if (reg * NFT_REG32_SIZE + len >
+ FIELD_SIZEOF(struct nft_regs, data))
+ return -ERANGE;
+
if (data != NULL && type != NFT_DATA_VALUE)
return -EINVAL;
return 0;
}
}
-EXPORT_SYMBOL_GPL(nft_validate_data_load);
+EXPORT_SYMBOL_GPL(nft_validate_register_store);
static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
@@ -3988,11 +4324,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
- data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+ data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
- switch (data->verdict) {
+ switch (data->verdict.code) {
default:
- switch (data->verdict & NF_VERDICT_MASK) {
+ switch (data->verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
@@ -4018,7 +4354,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return -EOPNOTSUPP;
chain->use++;
- data->chain = chain;
+ data->verdict.chain = chain;
desc->len = sizeof(data);
break;
}
@@ -4029,10 +4365,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- data->chain->use--;
+ data->verdict.chain->use--;
break;
}
}
@@ -4045,13 +4381,14 @@ static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
if (!nest)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
goto nla_put_failure;
- switch (data->verdict) {
+ switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
+ if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
+ data->verdict.chain->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
@@ -4061,7 +4398,8 @@ nla_put_failure:
return -1;
}
-static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
+static int nft_value_init(const struct nft_ctx *ctx,
+ struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla)
{
unsigned int len;
@@ -4069,10 +4407,10 @@ static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
len = nla_len(nla);
if (len == 0)
return -EINVAL;
- if (len > sizeof(data->data))
+ if (len > size)
return -EOVERFLOW;
- nla_memcpy(data->data, nla, sizeof(data->data));
+ nla_memcpy(data->data, nla, len);
desc->type = NFT_DATA_VALUE;
desc->len = len;
return 0;
@@ -4085,8 +4423,7 @@ static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
}
static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
- [NFTA_DATA_VALUE] = { .type = NLA_BINARY,
- .len = FIELD_SIZEOF(struct nft_data, data) },
+ [NFTA_DATA_VALUE] = { .type = NLA_BINARY },
[NFTA_DATA_VERDICT] = { .type = NLA_NESTED },
};
@@ -4095,6 +4432,7 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
*
* @ctx: context of the expression using the data
* @data: destination struct nft_data
+ * @size: maximum data length
* @desc: data description
* @nla: netlink attribute containing data
*
@@ -4104,7 +4442,8 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
* The caller can indicate that it only wants to accept data of type
* NFT_DATA_VALUE by passing NULL for the ctx argument.
*/
-int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
+int nft_data_init(const struct nft_ctx *ctx,
+ struct nft_data *data, unsigned int size,
struct nft_data_desc *desc, const struct nlattr *nla)
{
struct nlattr *tb[NFTA_DATA_MAX + 1];
@@ -4115,7 +4454,8 @@ int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
return err;
if (tb[NFTA_DATA_VALUE])
- return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
+ return nft_value_init(ctx, data, size, desc,
+ tb[NFTA_DATA_VALUE]);
if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
return -EINVAL;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 2d298dccb6dd..f153b07073af 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -8,6 +8,7 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -21,24 +22,66 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
+enum nft_trace {
+ NFT_TRACE_RULE,
+ NFT_TRACE_RETURN,
+ NFT_TRACE_POLICY,
+};
+
+static const char *const comments[] = {
+ [NFT_TRACE_RULE] = "rule",
+ [NFT_TRACE_RETURN] = "return",
+ [NFT_TRACE_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = LOGLEVEL_WARNING,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+static void __nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+
+ nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+ chain->table->name, chain->name, comments[type],
+ rulenum);
+}
+
+static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
+{
+ if (unlikely(pkt->skb->nf_trace))
+ __nft_trace_packet(pkt, chain, rulenum, type);
+}
+
static void nft_cmp_fast_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1])
+ struct nft_regs *regs)
{
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
u32 mask = nft_cmp_fast_mask(priv->len);
- if ((data[priv->sreg].data[0] & mask) == priv->data)
+ if ((regs->data[priv->sreg] & mask) == priv->data)
return;
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static bool nft_payload_fast_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
unsigned char *ptr;
if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
@@ -51,12 +94,13 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
return false;
+ *dest = 0;
if (priv->len == 2)
- *(u16 *)dest->data = *(u16 *)ptr;
+ *(u16 *)dest = *(u16 *)ptr;
else if (priv->len == 4)
- *(u32 *)dest->data = *(u32 *)ptr;
+ *(u32 *)dest = *(u32 *)ptr;
else
- *(u8 *)dest->data = *(u8 *)ptr;
+ *(u8 *)dest = *(u8 *)ptr;
return true;
}
@@ -66,62 +110,25 @@ struct nft_jumpstack {
int rulenum;
};
-enum nft_trace {
- NFT_TRACE_RULE,
- NFT_TRACE_RETURN,
- NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
- [NFT_TRACE_RULE] = "rule",
- [NFT_TRACE_RETURN] = "return",
- [NFT_TRACE_POLICY] = "policy",
-};
-
-static struct nf_loginfo trace_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 4,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static void nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
-{
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
- nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
- pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
- chain->table->name, chain->name, comments[type],
- rulenum);
-}
-
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
const struct nft_chain *chain = ops->priv, *basechain = chain;
+ const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
- struct nft_data data[NFT_REG_MAX + 1];
+ struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
struct nft_stats *stats;
int rulenum;
- /*
- * Cache cursor to avoid problems in case that the cursor is updated
- * while traversing the ruleset.
- */
- unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
+ unsigned int gencursor = nft_genmask_cur(net);
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
next_rule:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs.verdict.code = NFT_CONTINUE;
list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
/* This rule is not active, skip. */
@@ -132,62 +139,52 @@ next_rule:
nft_rule_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
- nft_cmp_fast_eval(expr, data);
+ nft_cmp_fast_eval(expr, &regs);
else if (expr->ops != &nft_payload_fast_ops ||
- !nft_payload_fast_eval(expr, data, pkt))
- expr->ops->eval(expr, data, pkt);
+ !nft_payload_fast_eval(expr, &regs, pkt))
+ expr->ops->eval(expr, &regs, pkt);
- if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
+ if (regs.verdict.code != NFT_CONTINUE)
break;
}
- switch (data[NFT_REG_VERDICT].verdict) {
+ switch (regs.verdict.code) {
case NFT_BREAK:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs.verdict.code = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
continue;
}
break;
}
- switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) {
+ switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
- return data[NFT_REG_VERDICT].verdict;
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ return regs.verdict.code;
}
- switch (data[NFT_REG_VERDICT].verdict) {
+ switch (regs.verdict.code) {
case NFT_JUMP:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
-
BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
jumpstack[stackptr].chain = chain;
jumpstack[stackptr].rule = rule;
jumpstack[stackptr].rulenum = rulenum;
stackptr++;
- chain = data[NFT_REG_VERDICT].chain;
- goto do_chain;
+ /* fall through */
case NFT_GOTO:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
- chain = data[NFT_REG_VERDICT].chain;
+ chain = regs.verdict.chain;
goto do_chain;
- case NFT_RETURN:
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
- break;
case NFT_CONTINUE:
- if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN)))
- nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
+ rulenum++;
+ /* fall through */
+ case NFT_RETURN:
+ nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
break;
default:
WARN_ON(1);
@@ -201,8 +198,7 @@ next_rule:
goto next_rule;
}
- if (unlikely(pkt->skb->nf_trace))
- nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+ nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
@@ -244,8 +240,14 @@ int __init nf_tables_core_module_init(void)
if (err < 0)
goto err6;
+ err = nft_dynset_module_init();
+ if (err < 0)
+ goto err7;
+
return 0;
+err7:
+ nft_payload_module_exit();
err6:
nft_byteorder_module_exit();
err5:
@@ -262,6 +264,7 @@ err1:
void nf_tables_core_module_exit(void)
{
+ nft_dynset_module_exit();
nft_payload_module_exit();
nft_byteorder_module_exit();
nft_bitwise_module_exit();
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 11d85b3813f2..3ad91266c821 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -23,6 +23,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
@@ -62,7 +63,7 @@ struct nfulnl_instance {
struct timer_list timer;
struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
- int peer_portid; /* PORTID of the peer process */
+ u32 peer_portid; /* PORTID of the peer process */
/* configurable parameters */
unsigned int flushtimeout; /* timeout until queue flush */
@@ -151,7 +152,7 @@ static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
instance_create(struct net *net, u_int16_t group_num,
- int portid, struct user_namespace *user_ns)
+ u32 portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
struct nfnl_log_net *log = nfnl_log_pernet(net);
@@ -448,14 +449,18 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physindev;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physindev &&
+
+ physindev = nf_bridge_get_physindev(skb);
+ if (physindev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
- htonl(skb->nf_bridge->physindev->ifindex)))
+ htonl(physindev->ifindex)))
goto nla_put_failure;
}
#endif
@@ -479,14 +484,18 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physoutdev;
+
/* Case 2: indev is a bridge group, we need to look
* for physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physoutdev &&
+
+ physoutdev = nf_bridge_get_physoutdev(skb);
+ if (physoutdev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
- htonl(skb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutdev->ifindex)))
goto nla_put_failure;
}
#endif
@@ -539,7 +548,7 @@ __build_packet_message(struct nfnl_log_net *log,
/* UID */
sk = skb->sk;
- if (sk && sk->sk_state != TCP_TIME_WAIT) {
+ if (sk && sk_fullsock(sk)) {
read_lock_bh(&sk->sk_callback_lock);
if (sk->sk_socket && sk->sk_socket->file) {
struct file *file = sk->sk_socket->file;
@@ -998,11 +1007,13 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfulnl_instance *inst = v;
- return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
- inst->group_num,
- inst->peer_portid, inst->qlen,
- inst->copy_mode, inst->copy_range,
- inst->flushtimeout, atomic_read(&inst->use));
+ seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
+ inst->group_num,
+ inst->peer_portid, inst->qlen,
+ inst->copy_mode, inst->copy_range,
+ inst->flushtimeout, atomic_read(&inst->use));
+
+ return 0;
}
static const struct seq_operations nful_seq_ops = {
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 0db8515e76da..0b98c7420239 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -25,6 +25,7 @@
#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
#include <linux/list.h>
@@ -54,7 +55,7 @@ struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
- int peer_portid;
+ u32 peer_portid;
unsigned int queue_maxlen;
unsigned int copy_range;
unsigned int queue_dropped;
@@ -109,8 +110,7 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
- int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
{
struct nfqnl_instance *inst;
unsigned int h;
@@ -257,7 +257,7 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
{
const struct cred *cred;
- if (sk->sk_state == TCP_TIME_WAIT)
+ if (!sk_fullsock(sk))
return 0;
read_lock_bh(&sk->sk_callback_lock);
@@ -314,13 +314,13 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (entskb->tstamp.tv64)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
- if (entry->hook <= NF_INET_FORWARD ||
- (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
+ if (entry->state.hook <= NF_INET_FORWARD ||
+ (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
csum_verify = !skb_csum_unnecessary(entskb);
else
csum_verify = false;
- outdev = entry->outdev;
+ outdev = entry->state.out;
switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
case NFQNL_COPY_META:
@@ -368,23 +368,23 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
return NULL;
}
nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = entry->pf;
+ nfmsg->nfgen_family = entry->state.pf;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(queue->queue_num);
nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
pmsg = nla_data(nla);
pmsg->hw_protocol = entskb->protocol;
- pmsg->hook = entry->hook;
+ pmsg->hook = entry->state.hook;
*packet_id_ptr = &pmsg->packet_id;
- indev = entry->indev;
+ indev = entry->state.in;
if (indev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
goto nla_put_failure;
#else
- if (entry->pf == PF_BRIDGE) {
+ if (entry->state.pf == PF_BRIDGE) {
/* Case 1: indev is physical input device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
@@ -396,14 +396,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physinif;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physindev &&
+
+ physinif = nf_bridge_get_physinif(entskb);
+ if (physinif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
- htonl(entskb->nf_bridge->physindev->ifindex)))
+ htonl(physinif)))
goto nla_put_failure;
}
#endif
@@ -414,7 +418,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
goto nla_put_failure;
#else
- if (entry->pf == PF_BRIDGE) {
+ if (entry->state.pf == PF_BRIDGE) {
/* Case 1: outdev is physical output device, we need to
* look for bridge group (when called from
* netfilter_bridge) */
@@ -426,14 +430,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physoutif;
+
/* Case 2: outdev is bridge group, we need to look for
* physical output device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physoutdev &&
+
+ physoutif = nf_bridge_get_physoutif(entskb);
+ if (physoutif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
- htonl(entskb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutif)))
goto nla_put_failure;
}
#endif
@@ -633,8 +641,8 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
struct nfqnl_instance *queue;
struct sk_buff *skb, *segs;
int err = -ENOBUFS;
- struct net *net = dev_net(entry->indev ?
- entry->indev : entry->outdev);
+ struct net *net = dev_net(entry->state.in ?
+ entry->state.in : entry->state.out);
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
/* rcu_read_lock()ed by nf_hook_slow() */
@@ -647,7 +655,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
skb = entry->skb;
- switch (entry->pf) {
+ switch (entry->state.pf) {
case NFPROTO_IPV4:
skb->protocol = htons(ETH_P_IP);
break;
@@ -757,19 +765,20 @@ nfqnl_set_mode(struct nfqnl_instance *queue,
static int
dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
{
- if (entry->indev)
- if (entry->indev->ifindex == ifindex)
+ if (entry->state.in)
+ if (entry->state.in->ifindex == ifindex)
return 1;
- if (entry->outdev)
- if (entry->outdev->ifindex == ifindex)
+ if (entry->state.out)
+ if (entry->state.out->ifindex == ifindex)
return 1;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- if (entry->skb->nf_bridge->physindev &&
- entry->skb->nf_bridge->physindev->ifindex == ifindex)
- return 1;
- if (entry->skb->nf_bridge->physoutdev &&
- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ int physinif, physoutif;
+
+ physinif = nf_bridge_get_physinif(entry->skb);
+ physoutif = nf_bridge_get_physoutif(entry->skb);
+
+ if (physinif == ifindex || physoutif == ifindex)
return 1;
}
#endif
@@ -860,7 +869,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
};
static struct nfqnl_instance *
-verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
{
struct nfqnl_instance *queue;
@@ -1242,7 +1251,7 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfqnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
inst->queue_num,
inst->peer_portid, inst->queue_total,
inst->copy_mode, inst->copy_range,
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 4fb6ee2c1106..d71cc18fa35d 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -26,18 +26,16 @@ struct nft_bitwise {
};
static void nft_bitwise_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- const struct nft_data *src = &data[priv->sreg];
- struct nft_data *dst = &data[priv->dreg];
+ const u32 *src = &regs->data[priv->sreg];
+ u32 *dst = &regs->data[priv->dreg];
unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
- dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
- priv->xor.data[i];
- }
+ for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+ dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
}
static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
@@ -63,28 +61,27 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_XOR] == NULL)
return -EINVAL;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+ priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
+ err = nft_validate_register_load(priv->sreg, priv->len);
if (err < 0)
return err;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
- err = nft_validate_output_register(priv->dreg);
+ priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
if (err < 0)
return err;
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
- priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
- err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
+ err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1,
+ tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
if (d1.len != priv->len)
return -EINVAL;
- err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
+ err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2,
+ tb[NFTA_BITWISE_XOR]);
if (err < 0)
return err;
if (d2.len != priv->len)
@@ -97,9 +94,9 @@ static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index c39ed8d29df1..fde5145f2e36 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -26,16 +26,17 @@ struct nft_byteorder {
};
static void nft_byteorder_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
+ u32 *src = &regs->data[priv->sreg];
+ u32 *dst = &regs->data[priv->dreg];
union { u32 u32; u16 u16; } *s, *d;
unsigned int i;
- s = (void *)src->data;
- d = (void *)dst->data;
+ s = (void *)src;
+ d = (void *)dst;
switch (priv->size) {
case 4:
@@ -87,19 +88,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
tb[NFTA_BYTEORDER_OP] == NULL)
return -EINVAL;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
- err = nft_validate_input_register(priv->sreg);
- if (err < 0)
- return err;
-
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
@@ -109,10 +97,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
- if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
- return -EINVAL;
-
priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
switch (priv->size) {
case 2:
@@ -122,16 +106,24 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- return 0;
+ priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
+ priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+ err = nft_validate_register_load(priv->sreg, priv->len);
+ if (err < 0)
+ return err;
+
+ priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index e2b3f51c81f1..e25b35d70e4d 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -25,13 +25,13 @@ struct nft_cmp_expr {
};
static void nft_cmp_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_cmp_expr *priv = nft_expr_priv(expr);
int d;
- d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
+ d = memcmp(&regs->data[priv->sreg], &priv->data, priv->len);
switch (priv->op) {
case NFT_CMP_EQ:
if (d != 0)
@@ -59,7 +59,7 @@ static void nft_cmp_eval(const struct nft_expr *expr,
return;
mismatch:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
@@ -75,12 +75,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct nft_data_desc desc;
int err;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
- priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
-
- err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc,
+ tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
+ priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
+ err = nft_validate_register_load(priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
priv->len = desc.len;
return 0;
}
@@ -89,7 +93,7 @@ static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_cmp_expr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
goto nla_put_failure;
@@ -122,13 +126,18 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
u32 mask;
int err;
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
-
- err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, sizeof(data), &desc,
+ tb[NFTA_CMP_DATA]);
BUG_ON(err < 0);
- desc.len *= BITS_PER_BYTE;
+ priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
+ err = nft_validate_register_load(priv->sreg, desc.len);
+ if (err < 0)
+ return err;
+
+ desc.len *= BITS_PER_BYTE;
mask = nft_cmp_fast_mask(desc.len);
+
priv->data = data.data[0] & mask;
priv->len = desc.len;
return 0;
@@ -139,7 +148,7 @@ static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
struct nft_data data;
- if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ)))
goto nla_put_failure;
@@ -167,7 +176,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
{
struct nft_data_desc desc;
struct nft_data data;
- enum nft_registers sreg;
enum nft_cmp_ops op;
int err;
@@ -176,11 +184,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
tb[NFTA_CMP_DATA] == NULL)
return ERR_PTR(-EINVAL);
- sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
- err = nft_validate_input_register(sreg);
- if (err < 0)
- return ERR_PTR(err);
-
op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
switch (op) {
case NFT_CMP_EQ:
@@ -194,7 +197,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
return ERR_PTR(-EINVAL);
}
- err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
+ err = nft_data_init(NULL, &data, sizeof(data), &desc,
+ tb[NFTA_CMP_DATA]);
if (err < 0)
return ERR_PTR(err);
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 65f3e2b6be44..7f29cfc76349 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -20,6 +20,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -42,6 +43,7 @@ union nft_entry {
struct ipt_entry e4;
struct ip6t_entry e6;
struct ebt_entry ebt;
+ struct arpt_entry arp;
};
static inline void
@@ -53,7 +55,7 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
}
static void nft_target_eval_xt(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -70,16 +72,16 @@ static void nft_target_eval_xt(const struct nft_expr *expr,
switch (ret) {
case XT_CONTINUE:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs->verdict.code = NFT_CONTINUE;
break;
default:
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
break;
}
}
static void nft_target_eval_bridge(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -96,19 +98,19 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
switch (ret) {
case EBT_ACCEPT:
- data[NFT_REG_VERDICT].verdict = NF_ACCEPT;
+ regs->verdict.code = NF_ACCEPT;
break;
case EBT_DROP:
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
break;
case EBT_CONTINUE:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ regs->verdict.code = NFT_CONTINUE;
break;
case EBT_RETURN:
- data[NFT_REG_VERDICT].verdict = NFT_RETURN;
+ regs->verdict.code = NFT_RETURN;
break;
default:
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
break;
}
}
@@ -143,6 +145,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
entry->ebt.ethproto = (__force __be16)proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
+ case NFPROTO_ARP:
+ break;
}
par->entryinfo = entry;
par->target = target;
@@ -300,7 +304,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
}
static void nft_match_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
void *info = nft_expr_priv(expr);
@@ -313,16 +317,16 @@ static void nft_match_eval(const struct nft_expr *expr,
ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
if (pkt->xt.hotdrop) {
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+ regs->verdict.code = NF_DROP;
return;
}
- switch(ret) {
- case true:
- data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
+ switch (ret ? 1 : 0) {
+ case 1:
+ regs->verdict.code = NFT_CONTINUE;
break;
- case false:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ case 0:
+ regs->verdict.code = NFT_BREAK;
break;
}
}
@@ -357,6 +361,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
entry->ebt.ethproto = (__force __be16)proto;
entry->ebt.invflags = inv ? EBT_IPROTO : 0;
break;
+ case NFPROTO_ARP:
+ break;
}
par->entryinfo = entry;
par->match = match;
@@ -543,6 +549,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
case NFPROTO_BRIDGE:
fmt = "ebt_%s";
break;
+ case NFPROTO_ARP:
+ fmt = "arpt_%s";
+ break;
default:
pr_err("nft_compat: unsupported protocol %d\n",
nfmsg->nfgen_family);
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index c89ee486ce54..17591239229f 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -24,7 +24,7 @@ struct nft_counter {
};
static void nft_counter_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_counter *priv = nft_expr_priv(expr);
@@ -92,6 +92,7 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
.ops = &nft_counter_ops,
.policy = nft_counter_policy,
.maxattr = NFTA_COUNTER_MAX,
+ .flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index cc5603016242..8cbca3432f90 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -31,11 +31,11 @@ struct nft_ct {
};
static void nft_ct_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
const struct nf_conn_help *help;
@@ -54,8 +54,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
state = NF_CT_STATE_UNTRACKED_BIT;
else
state = NF_CT_STATE_BIT(ctinfo);
- dest->data[0] = state;
+ *dest = state;
return;
+ default:
+ break;
}
if (ct == NULL)
@@ -63,26 +65,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_CT_DIRECTION:
- dest->data[0] = CTINFO2DIR(ctinfo);
+ *dest = CTINFO2DIR(ctinfo);
return;
case NFT_CT_STATUS:
- dest->data[0] = ct->status;
+ *dest = ct->status;
return;
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
- dest->data[0] = ct->mark;
+ *dest = ct->mark;
return;
#endif
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
- dest->data[0] = ct->secmark;
+ *dest = ct->secmark;
return;
#endif
case NFT_CT_EXPIRATION:
diff = (long)jiffies - (long)ct->timeout.expires;
if (diff < 0)
diff = 0;
- dest->data[0] = jiffies_to_msecs(diff);
+ *dest = jiffies_to_msecs(diff);
return;
case NFT_CT_HELPER:
if (ct->master == NULL)
@@ -93,9 +95,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
helper = rcu_dereference(help->helper);
if (helper == NULL)
goto err;
- if (strlen(helper->name) >= sizeof(dest->data))
- goto err;
- strncpy((char *)dest->data, helper->name, sizeof(dest->data));
+ strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN);
return;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
@@ -103,58 +103,60 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
unsigned int size;
if (!labels) {
- memset(dest->data, 0, sizeof(dest->data));
+ memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
return;
}
- BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data));
size = labels->words * sizeof(long);
-
- memcpy(dest->data, labels->bits, size);
- if (size < sizeof(dest->data))
- memset(((char *) dest->data) + size, 0,
- sizeof(dest->data) - size);
+ memcpy(dest, labels->bits, size);
+ if (size < NF_CT_LABELS_MAX_SIZE)
+ memset(((char *) dest) + size, 0,
+ NF_CT_LABELS_MAX_SIZE - size);
return;
}
#endif
+ default:
+ break;
}
tuple = &ct->tuplehash[priv->dir].tuple;
switch (priv->key) {
case NFT_CT_L3PROTOCOL:
- dest->data[0] = nf_ct_l3num(ct);
+ *dest = nf_ct_l3num(ct);
return;
case NFT_CT_SRC:
- memcpy(dest->data, tuple->src.u3.all,
+ memcpy(dest, tuple->src.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
case NFT_CT_DST:
- memcpy(dest->data, tuple->dst.u3.all,
+ memcpy(dest, tuple->dst.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
case NFT_CT_PROTOCOL:
- dest->data[0] = nf_ct_protonum(ct);
+ *dest = nf_ct_protonum(ct);
return;
case NFT_CT_PROTO_SRC:
- dest->data[0] = (__force __u16)tuple->src.u.all;
+ *dest = (__force __u16)tuple->src.u.all;
return;
case NFT_CT_PROTO_DST:
- dest->data[0] = (__force __u16)tuple->dst.u.all;
+ *dest = (__force __u16)tuple->dst.u.all;
return;
+ default:
+ break;
}
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static void nft_ct_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_ct *priv = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
#ifdef CONFIG_NF_CONNTRACK_MARK
- u32 value = data[priv->sreg].data[0];
+ u32 value = regs->data[priv->sreg];
#endif
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
@@ -172,6 +174,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
}
break;
#endif
+ default:
+ break;
}
}
@@ -220,12 +224,17 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
- case NFT_CT_STATE:
case NFT_CT_DIRECTION:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = sizeof(u8);
+ break;
+ case NFT_CT_STATE:
case NFT_CT_STATUS:
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
@@ -233,22 +242,54 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
#ifdef CONFIG_NF_CONNTRACK_SECMARK
case NFT_CT_SECMARK:
#endif
+ case NFT_CT_EXPIRATION:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = sizeof(u32);
+ break;
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS:
+ if (tb[NFTA_CT_DIRECTION] != NULL)
+ return -EINVAL;
+ len = NF_CT_LABELS_MAX_SIZE;
+ break;
#endif
- case NFT_CT_EXPIRATION:
case NFT_CT_HELPER:
if (tb[NFTA_CT_DIRECTION] != NULL)
return -EINVAL;
+ len = NF_CT_HELPER_NAME_LEN;
break;
+
case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+ len = sizeof(u8);
+ break;
case NFT_CT_SRC:
case NFT_CT_DST:
+ if (tb[NFTA_CT_DIRECTION] == NULL)
+ return -EINVAL;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_IPV4:
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ src.u3.ip);
+ break;
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ src.u3.ip6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+ break;
case NFT_CT_PROTO_SRC:
case NFT_CT_PROTO_DST:
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
+ len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
break;
default:
return -EOPNOTSUPP;
@@ -265,12 +306,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
}
}
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
if (err < 0)
return err;
@@ -286,20 +324,22 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
+ len = FIELD_SIZEOF(struct nf_conn, mark);
break;
#endif
default:
return -EOPNOTSUPP;
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
+ err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
return err;
@@ -320,7 +360,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
@@ -347,7 +387,7 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_ct *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_CT_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
new file mode 100644
index 000000000000..513a8ef60a59
--- /dev/null
+++ b/net/netfilter/nft_dynset.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_dynset {
+ struct nft_set *set;
+ struct nft_set_ext_tmpl tmpl;
+ enum nft_dynset_ops op:8;
+ enum nft_registers sreg_key:8;
+ enum nft_registers sreg_data:8;
+ u64 timeout;
+ struct nft_expr *expr;
+ struct nft_set_binding binding;
+};
+
+static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
+ struct nft_regs *regs)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set_ext *ext;
+ u64 timeout;
+ void *elem;
+
+ if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
+ return NULL;
+
+ timeout = priv->timeout ? : set->timeout;
+ elem = nft_set_elem_init(set, &priv->tmpl,
+ &regs->data[priv->sreg_key],
+ &regs->data[priv->sreg_data],
+ timeout, GFP_ATOMIC);
+ if (elem == NULL) {
+ if (set->size)
+ atomic_dec(&set->nelems);
+ return NULL;
+ }
+
+ ext = nft_set_elem_ext(set, elem);
+ if (priv->expr != NULL)
+ nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+
+ return elem;
+}
+
+static void nft_dynset_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
+ const struct nft_expr *sexpr;
+ u64 timeout;
+
+ if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
+ expr, regs, &ext)) {
+ sexpr = NULL;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+ sexpr = nft_set_ext_expr(ext);
+
+ if (priv->op == NFT_DYNSET_OP_UPDATE &&
+ nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ timeout = priv->timeout ? : set->timeout;
+ *nft_set_ext_expiration(ext) = jiffies + timeout;
+ } else if (sexpr == NULL)
+ goto out;
+
+ if (sexpr != NULL)
+ sexpr->ops->eval(sexpr, regs, pkt);
+ return;
+ }
+out:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
+ [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 },
+ [NFTA_DYNSET_OP] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
+ [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED },
+};
+
+static int nft_dynset_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set;
+ u64 timeout;
+ int err;
+
+ if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
+ tb[NFTA_DYNSET_OP] == NULL ||
+ tb[NFTA_DYNSET_SREG_KEY] == NULL)
+ return -EINVAL;
+
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_DYNSET_SET_ID])
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_DYNSET_SET_ID]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
+ if (set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+ switch (priv->op) {
+ case NFT_DYNSET_OP_ADD:
+ break;
+ case NFT_DYNSET_OP_UPDATE:
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ timeout = 0;
+ if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+ }
+
+ priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
+ err = nft_validate_register_load(priv->sreg_key, set->klen);;
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+ if (!(set->flags & NFT_SET_MAP))
+ return -EINVAL;
+ if (set->dtype == NFT_DATA_VERDICT)
+ return -EOPNOTSUPP;
+
+ priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]);
+ err = nft_validate_register_load(priv->sreg_data, set->dlen);
+ if (err < 0)
+ return err;
+ } else if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ if (tb[NFTA_DYNSET_EXPR] != NULL) {
+ if (!(set->flags & NFT_SET_EVAL))
+ return -EINVAL;
+ if (!(set->flags & NFT_SET_ANONYMOUS))
+ return -EOPNOTSUPP;
+
+ priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
+ if (IS_ERR(priv->expr))
+ return PTR_ERR(priv->expr);
+
+ err = -EOPNOTSUPP;
+ if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
+ goto err1;
+ } else if (set->flags & NFT_SET_EVAL)
+ return -EINVAL;
+
+ nft_set_ext_prepare(&priv->tmpl);
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (set->flags & NFT_SET_MAP)
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+ if (priv->expr != NULL)
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR,
+ priv->expr->ops->size);
+ if (set->flags & NFT_SET_TIMEOUT) {
+ if (timeout || set->timeout)
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
+
+ priv->timeout = timeout;
+
+ err = nf_tables_bind_set(ctx, set, &priv->binding);
+ if (err < 0)
+ goto err1;
+
+ priv->set = set;
+ return 0;
+
+err1:
+ if (priv->expr != NULL)
+ nft_expr_destroy(ctx, priv->expr);
+ return err;
+}
+
+static void nft_dynset_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+ if (priv->expr != NULL)
+ nft_expr_destroy(ctx, priv->expr);
+}
+
+static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
+ goto nla_put_failure;
+ if (priv->set->flags & NFT_SET_MAP &&
+ nft_dump_register(skb, NFTA_DYNSET_SREG_DATA, priv->sreg_data))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op)))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
+ goto nla_put_failure;
+ if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dynset_type;
+static const struct nft_expr_ops nft_dynset_ops = {
+ .type = &nft_dynset_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
+ .eval = nft_dynset_eval,
+ .init = nft_dynset_init,
+ .destroy = nft_dynset_destroy,
+ .dump = nft_dynset_dump,
+};
+
+static struct nft_expr_type nft_dynset_type __read_mostly = {
+ .name = "dynset",
+ .ops = &nft_dynset_ops,
+ .policy = nft_dynset_policy,
+ .maxattr = NFTA_DYNSET_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_dynset_module_init(void)
+{
+ return nft_register_expr(&nft_dynset_type);
+}
+
+void nft_dynset_module_exit(void)
+{
+ nft_unregister_expr(&nft_dynset_type);
+}
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
deleted file mode 100644
index b6eed4d5a096..000000000000
--- a/net/netfilter/nft_expr_template.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-
-struct nft_template {
-
-};
-
-static void nft_template_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
-}
-
-static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
- [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 },
-};
-
-static int nft_template_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
- return 0;
-}
-
-static void nft_template_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
-{
- struct nft_template *priv = nft_expr_priv(expr);
-
-}
-
-static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
-{
- const struct nft_template *priv = nft_expr_priv(expr);
-
- NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
-static struct nft_expr_type nft_template_type;
-static const struct nft_expr_ops nft_template_ops = {
- .type = &nft_template_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_template)),
- .eval = nft_template_eval,
- .init = nft_template_init,
- .destroy = nft_template_destroy,
- .dump = nft_template_dump,
-};
-
-static struct nft_expr_type nft_template_type __read_mostly = {
- .name = "template",
- .ops = &nft_template_ops,
- .policy = nft_template_policy,
- .maxattr = NFTA_TEMPLATE_MAX,
- .owner = THIS_MODULE,
-};
-
-static int __init nft_template_module_init(void)
-{
- return nft_register_expr(&nft_template_type);
-}
-
-static void __exit nft_template_module_exit(void)
-{
- nft_unregister_expr(&nft_template_type);
-}
-
-module_init(nft_template_module_init);
-module_exit(nft_template_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 55c939f5371f..ba7aed13e174 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -26,11 +26,11 @@ struct nft_exthdr {
};
static void nft_exthdr_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
unsigned int offset = 0;
int err;
@@ -39,11 +39,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
goto err;
offset += priv->offset;
- if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
@@ -58,7 +59,6 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- int err;
if (tb[NFTA_EXTHDR_DREG] == NULL ||
tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -69,22 +69,17 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
- if (priv->len == 0 ||
- priv->len > FIELD_SIZEOF(struct nft_data, data))
- return -EINVAL;
+ priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_exthdr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
goto nla_put_failure;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 37c15e674884..3f9d45d3d9b7 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,6 +15,7 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
+#include <linux/workqueue.h>
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -23,119 +24,175 @@
/* We target a hash table size of 4, element hint is 75% of final size */
#define NFT_HASH_ELEMENT_HINT 3
+struct nft_hash {
+ struct rhashtable ht;
+ struct delayed_work gc_work;
+};
+
struct nft_hash_elem {
struct rhash_head node;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_set_ext ext;
+};
+
+struct nft_hash_cmp_arg {
+ const struct nft_set *set;
+ const u32 *key;
+ u8 genmask;
};
-static bool nft_hash_lookup(const struct nft_set *set,
- const struct nft_data *key,
- struct nft_data *data)
+static const struct rhashtable_params nft_hash_params;
+
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+{
+ const struct nft_hash_cmp_arg *arg = data;
+
+ return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
{
- struct rhashtable *priv = nft_set_priv(set);
+ const struct nft_hash_elem *he = data;
+
+ return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_hash_cmp_arg *x = arg->key;
+ const struct nft_hash_elem *he = ptr;
+
+ if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+ return 1;
+ if (nft_set_elem_expired(&he->ext))
+ return 1;
+ if (!nft_set_elem_active(&he->ext, x->genmask))
+ return 1;
+ return 0;
+}
+
+static bool nft_hash_lookup(const struct nft_set *set, const u32 *key,
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_cur(read_pnet(&set->pnet)),
+ .set = set,
+ .key = key,
+ };
- he = rhashtable_lookup(priv, key);
- if (he && set->flags & NFT_SET_MAP)
- nft_data_copy(data, he->data);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ *ext = &he->ext;
return !!he;
}
-static int nft_hash_insert(const struct nft_set *set,
- const struct nft_set_elem *elem)
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_regs *regs),
+ const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_set_ext **ext)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
- unsigned int size;
-
- if (elem->flags != 0)
- return -EINVAL;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
- size = sizeof(*he);
- if (set->flags & NFT_SET_MAP)
- size += sizeof(he->data[0]);
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ goto out;
- he = kzalloc(size, GFP_KERNEL);
+ he = new(set, expr, regs);
if (he == NULL)
- return -ENOMEM;
-
- nft_data_copy(&he->key, &elem->key);
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(he->data, &elem->data);
-
- rhashtable_insert(priv, &he->node);
+ goto err1;
+ if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params))
+ goto err2;
+out:
+ *ext = &he->ext;
+ return true;
- return 0;
+err2:
+ nft_set_elem_destroy(set, he);
+err1:
+ return false;
}
-static void nft_hash_elem_destroy(const struct nft_set *set,
- struct nft_hash_elem *he)
+static int nft_hash_insert(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- nft_data_uninit(&he->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
- nft_data_uninit(he->data, set->dtype);
- kfree(he);
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .set = set,
+ .key = elem->key.val.data,
+ };
+
+ return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params);
}
-static void nft_hash_remove(const struct nft_set *set,
- const struct nft_set_elem *elem)
+static void nft_hash_activate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
- rhashtable_remove(priv, elem->cookie);
- synchronize_rcu();
- kfree(elem->cookie);
+ nft_set_elem_change_active(set, &he->ext);
+ nft_set_elem_clear_busy(&he->ext);
}
-struct nft_compare_arg {
- const struct nft_set *set;
- struct nft_set_elem *elem;
-};
-
-static bool nft_hash_compare(void *ptr, void *arg)
+static void *nft_hash_deactivate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct nft_hash_elem *he = ptr;
- struct nft_compare_arg *x = arg;
-
- if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
- x->elem->cookie = he;
- x->elem->flags = 0;
- if (x->set->flags & NFT_SET_MAP)
- nft_data_copy(&x->elem->data, he->data);
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .set = set,
+ .key = elem->key.val.data,
+ };
- return true;
+ rcu_read_lock();
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL) {
+ if (!nft_set_elem_mark_busy(&he->ext))
+ nft_set_elem_change_active(set, &he->ext);
+ else
+ he = NULL;
}
+ rcu_read_unlock();
- return false;
+ return he;
}
-static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void nft_hash_remove(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
- struct rhashtable *priv = nft_set_priv(set);
- struct nft_compare_arg arg = {
- .set = set,
- .elem = elem,
- };
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he = elem->priv;
- if (rhashtable_lookup_compare(priv, &elem->key,
- &nft_hash_compare, &arg))
- return 0;
-
- return -ENOENT;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
}
static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_iter *iter)
{
- struct rhashtable *priv = nft_set_priv(set);
- const struct nft_hash_elem *he;
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
struct rhashtable_iter hti;
struct nft_set_elem elem;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int err;
- err = rhashtable_walk_init(priv, &hti);
+ err = rhashtable_walk_init(&priv->ht, &hti);
iter->err = err;
if (err)
return;
@@ -159,11 +216,12 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
+ if (nft_set_elem_expired(&he->ext))
+ goto cont;
+ if (!nft_set_elem_active(&he->ext, genmask))
+ goto cont;
- memcpy(&elem.key, &he->key, sizeof(elem.key));
- if (set->flags & NFT_SET_MAP)
- memcpy(&elem.data, he->data, sizeof(elem.data));
- elem.flags = 0;
+ elem.priv = he;
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0)
@@ -178,47 +236,102 @@ out:
rhashtable_walk_exit(&hti);
}
+static void nft_hash_gc(struct work_struct *work)
+{
+ struct nft_set *set;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
+ int err;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = nft_set_container_of(priv);
+
+ err = rhashtable_walk_init(&priv->ht, &hti);
+ if (err)
+ goto schedule;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN)
+ goto out;
+ continue;
+ }
+
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ if (nft_set_elem_mark_busy(&he->ext))
+ continue;
+
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb == NULL)
+ goto out;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, he);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ nft_set_gc_batch_complete(gcb);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
- return sizeof(struct rhashtable);
+ return sizeof(struct nft_hash);
}
+static const struct rhashtable_params nft_hash_params = {
+ .head_offset = offsetof(struct nft_hash_elem, node),
+ .hashfn = nft_hash_key,
+ .obj_hashfn = nft_hash_obj,
+ .obj_cmpfn = nft_hash_cmp,
+ .automatic_shrinking = true,
+};
+
static int nft_hash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
{
- struct rhashtable *priv = nft_set_priv(set);
- struct rhashtable_params params = {
- .nelem_hint = desc->size ? : NFT_HASH_ELEMENT_HINT,
- .head_offset = offsetof(struct nft_hash_elem, node),
- .key_offset = offsetof(struct nft_hash_elem, key),
- .key_len = set->klen,
- .hashfn = jhash,
- };
+ struct nft_hash *priv = nft_set_priv(set);
+ struct rhashtable_params params = nft_hash_params;
+ int err;
- return rhashtable_init(priv, &params);
+ params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+ params.key_len = set->klen;
+
+ err = rhashtable_init(&priv->ht, &params);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_elem_destroy(void *ptr, void *arg)
{
- struct rhashtable *priv = nft_set_priv(set);
- const struct bucket_table *tbl;
- struct nft_hash_elem *he;
- struct rhash_head *pos, *next;
- unsigned int i;
-
- /* Stop an eventual async resizing */
- priv->being_destroyed = true;
- mutex_lock(&priv->mutex);
+ nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+}
- tbl = rht_dereference(priv->tbl, priv);
- for (i = 0; i < tbl->size; i++) {
- rht_for_each_entry_safe(he, pos, next, tbl, i, node)
- nft_hash_elem_destroy(set, he);
- }
- mutex_unlock(&priv->mutex);
+static void nft_hash_destroy(const struct nft_set *set)
+{
+ struct nft_hash *priv = nft_set_priv(set);
- rhashtable_destroy(priv);
+ cancel_delayed_work_sync(&priv->gc_work);
+ rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+ (void *)set);
}
static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
@@ -227,11 +340,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
unsigned int esize;
esize = sizeof(struct nft_hash_elem);
- if (features & NFT_SET_MAP)
- esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
-
if (desc->size) {
- est->size = sizeof(struct rhashtable) +
+ est->size = sizeof(struct nft_hash) +
roundup_pow_of_two(desc->size * 4 / 3) *
sizeof(struct nft_hash_elem *) +
desc->size * esize;
@@ -251,15 +361,18 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
static struct nft_set_ops nft_hash_ops __read_mostly = {
.privsize = nft_hash_privsize,
+ .elemsize = offsetof(struct nft_hash_elem, ext),
.estimate = nft_hash_estimate,
.init = nft_hash_init,
.destroy = nft_hash_destroy,
- .get = nft_hash_get,
.insert = nft_hash_insert,
+ .activate = nft_hash_activate,
+ .deactivate = nft_hash_deactivate,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
+ .update = nft_hash_update,
.walk = nft_hash_walk,
- .features = NFT_SET_MAP,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 810385eb7249..db3b746858e3 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -24,12 +24,12 @@ struct nft_immediate_expr {
};
static void nft_immediate_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
- nft_data_copy(&data[priv->dreg], &priv->data);
+ nft_data_copy(&regs->data[priv->dreg], &priv->data, priv->dlen);
}
static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
@@ -49,17 +49,15 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
tb[NFTA_IMMEDIATE_DATA] == NULL)
return -EINVAL;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
+ err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc,
+ tb[NFTA_IMMEDIATE_DATA]);
if (err < 0)
return err;
priv->dlen = desc.len;
- err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
+ priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, &priv->data,
+ desc.type, desc.len);
if (err < 0)
goto err1;
@@ -81,7 +79,7 @@ static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_IMMEDIATE_DREG, priv->dreg))
goto nla_put_failure;
return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 85da5bd02f64..435c1ccd6c0e 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -27,7 +27,7 @@ struct nft_limit {
};
static void nft_limit_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_limit *priv = nft_expr_priv(expr);
@@ -45,7 +45,7 @@ static void nft_limit_eval(const struct nft_expr *expr,
}
spin_unlock_bh(&limit_lock);
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
@@ -98,6 +98,7 @@ static struct nft_expr_type nft_limit_type __read_mostly = {
.ops = &nft_limit_ops,
.policy = nft_limit_policy,
.maxattr = NFTA_LIMIT_MAX,
+ .flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index bde05f28cf14..a13d6a386d63 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -27,7 +27,7 @@ struct nft_log {
};
static void nft_log_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_log *priv = nft_expr_priv(expr);
@@ -78,7 +78,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
li->u.log.level =
ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
} else {
- li->u.log.level = 4;
+ li->u.log.level = LOGLEVEL_WARNING;
}
if (tb[NFTA_LOG_FLAGS] != NULL) {
li->u.log.logflags =
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 9615b8b9fb37..b3c31ef8015d 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -26,15 +26,20 @@ struct nft_lookup {
};
static void nft_lookup_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_lookup *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
- if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
+ if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
+ if (set->flags & NFT_SET_MAP)
+ nft_data_copy(&regs->data[priv->dreg],
+ nft_set_ext_data(ext), set->dlen);
return;
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ }
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
@@ -66,8 +71,11 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ if (set->flags & NFT_SET_EVAL)
+ return -EOPNOTSUPP;
+
+ priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
+ err = nft_validate_register_load(priv->sreg, set->klen);
if (err < 0)
return err;
@@ -75,19 +83,16 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG]));
- err = nft_validate_output_register(priv->dreg);
+ priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]);
+ err = nft_validate_register_store(ctx, priv->dreg, NULL,
+ set->dtype, set->dlen);
if (err < 0)
return err;
-
- if (priv->dreg == NFT_REG_VERDICT) {
- if (set->dtype != NFT_DATA_VERDICT)
- return -EINVAL;
- } else if (set->dtype == NFT_DATA_VERDICT)
- return -EINVAL;
} else if (set->flags & NFT_SET_MAP)
return -EINVAL;
+ priv->binding.flags = set->flags & NFT_SET_MAP;
+
err = nf_tables_bind_set(ctx, set, &priv->binding);
if (err < 0)
return err;
@@ -110,10 +115,10 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
goto nla_put_failure;
if (priv->set->flags & NFT_SET_MAP)
- if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
return 0;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e99911eda915..52561e1c31e2 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -25,65 +25,68 @@
#include <net/netfilter/nft_meta.h>
void nft_meta_get_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
const struct net_device *in = pkt->in, *out = pkt->out;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
switch (priv->key) {
case NFT_META_LEN:
- dest->data[0] = skb->len;
+ *dest = skb->len;
break;
case NFT_META_PROTOCOL:
- *(__be16 *)dest->data = skb->protocol;
+ *dest = 0;
+ *(__be16 *)dest = skb->protocol;
break;
case NFT_META_NFPROTO:
- dest->data[0] = pkt->ops->pf;
+ *dest = pkt->ops->pf;
break;
case NFT_META_L4PROTO:
- dest->data[0] = pkt->tprot;
+ *dest = pkt->tprot;
break;
case NFT_META_PRIORITY:
- dest->data[0] = skb->priority;
+ *dest = skb->priority;
break;
case NFT_META_MARK:
- dest->data[0] = skb->mark;
+ *dest = skb->mark;
break;
case NFT_META_IIF:
if (in == NULL)
goto err;
- dest->data[0] = in->ifindex;
+ *dest = in->ifindex;
break;
case NFT_META_OIF:
if (out == NULL)
goto err;
- dest->data[0] = out->ifindex;
+ *dest = out->ifindex;
break;
case NFT_META_IIFNAME:
if (in == NULL)
goto err;
- strncpy((char *)dest->data, in->name, sizeof(dest->data));
+ strncpy((char *)dest, in->name, IFNAMSIZ);
break;
case NFT_META_OIFNAME:
if (out == NULL)
goto err;
- strncpy((char *)dest->data, out->name, sizeof(dest->data));
+ strncpy((char *)dest, out->name, IFNAMSIZ);
break;
case NFT_META_IIFTYPE:
if (in == NULL)
goto err;
- *(u16 *)dest->data = in->type;
+ *dest = 0;
+ *(u16 *)dest = in->type;
break;
case NFT_META_OIFTYPE:
if (out == NULL)
goto err;
- *(u16 *)dest->data = out->type;
+ *dest = 0;
+ *(u16 *)dest = out->type;
break;
case NFT_META_SKUID:
- if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
goto err;
read_lock_bh(&skb->sk->sk_callback_lock);
@@ -93,13 +96,12 @@ void nft_meta_get_eval(const struct nft_expr *expr,
goto err;
}
- dest->data[0] =
- from_kuid_munged(&init_user_ns,
+ *dest = from_kuid_munged(&init_user_ns,
skb->sk->sk_socket->file->f_cred->fsuid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
case NFT_META_SKGID:
- if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
goto err;
read_lock_bh(&skb->sk->sk_callback_lock);
@@ -108,8 +110,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
read_unlock_bh(&skb->sk->sk_callback_lock);
goto err;
}
- dest->data[0] =
- from_kgid_munged(&init_user_ns,
+ *dest = from_kgid_munged(&init_user_ns,
skb->sk->sk_socket->file->f_cred->fsgid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
@@ -119,33 +120,33 @@ void nft_meta_get_eval(const struct nft_expr *expr,
if (dst == NULL)
goto err;
- dest->data[0] = dst->tclassid;
+ *dest = dst->tclassid;
break;
}
#endif
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
- dest->data[0] = skb->secmark;
+ *dest = skb->secmark;
break;
#endif
case NFT_META_PKTTYPE:
if (skb->pkt_type != PACKET_LOOPBACK) {
- dest->data[0] = skb->pkt_type;
+ *dest = skb->pkt_type;
break;
}
switch (pkt->ops->pf) {
case NFPROTO_IPV4:
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
- dest->data[0] = PACKET_MULTICAST;
+ *dest = PACKET_MULTICAST;
else
- dest->data[0] = PACKET_BROADCAST;
+ *dest = PACKET_BROADCAST;
break;
case NFPROTO_IPV6:
if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
- dest->data[0] = PACKET_MULTICAST;
+ *dest = PACKET_MULTICAST;
else
- dest->data[0] = PACKET_BROADCAST;
+ *dest = PACKET_BROADCAST;
break;
default:
WARN_ON(1);
@@ -153,23 +154,22 @@ void nft_meta_get_eval(const struct nft_expr *expr,
}
break;
case NFT_META_CPU:
- dest->data[0] = smp_processor_id();
+ *dest = raw_smp_processor_id();
break;
case NFT_META_IIFGROUP:
if (in == NULL)
goto err;
- dest->data[0] = in->group;
+ *dest = in->group;
break;
case NFT_META_OIFGROUP:
if (out == NULL)
goto err;
- dest->data[0] = out->group;
+ *dest = out->group;
break;
case NFT_META_CGROUP:
- if (skb->sk == NULL)
- break;
-
- dest->data[0] = skb->sk->sk_classid;
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ goto err;
+ *dest = skb->sk->sk_classid;
break;
default:
WARN_ON(1);
@@ -178,17 +178,17 @@ void nft_meta_get_eval(const struct nft_expr *expr,
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
void nft_meta_set_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *meta = nft_expr_priv(expr);
struct sk_buff *skb = pkt->skb;
- u32 value = data[meta->sreg].data[0];
+ u32 value = regs->data[meta->sreg];
switch (meta->key) {
case NFT_META_MARK:
@@ -218,22 +218,22 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
- int err;
+ unsigned int len;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
- case NFT_META_LEN:
case NFT_META_PROTOCOL:
+ case NFT_META_IIFTYPE:
+ case NFT_META_OIFTYPE:
+ len = sizeof(u16);
+ break;
case NFT_META_NFPROTO:
case NFT_META_L4PROTO:
+ case NFT_META_LEN:
case NFT_META_PRIORITY:
case NFT_META_MARK:
case NFT_META_IIF:
case NFT_META_OIF:
- case NFT_META_IIFNAME:
- case NFT_META_OIFNAME:
- case NFT_META_IIFTYPE:
- case NFT_META_OIFTYPE:
case NFT_META_SKUID:
case NFT_META_SKGID:
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -247,21 +247,19 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_IIFGROUP:
case NFT_META_OIFGROUP:
case NFT_META_CGROUP:
+ len = sizeof(u32);
+ break;
+ case NFT_META_IIFNAME:
+ case NFT_META_OIFNAME:
+ len = IFNAMSIZ;
break;
default:
return -EOPNOTSUPP;
}
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
-
- err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
- if (err < 0)
- return err;
-
- return 0;
+ priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
@@ -270,20 +268,24 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int len;
int err;
priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
switch (priv->key) {
case NFT_META_MARK:
case NFT_META_PRIORITY:
+ len = sizeof(u32);
+ break;
case NFT_META_NFTRACE:
+ len = sizeof(u8);
break;
default:
return -EOPNOTSUPP;
}
- priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
- err = nft_validate_input_register(priv->sreg);
+ priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
+ err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
return err;
@@ -298,7 +300,7 @@ int nft_meta_get_dump(struct sk_buff *skb,
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
+ if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg))
goto nla_put_failure;
return 0;
@@ -314,7 +316,7 @@ int nft_meta_set_dump(struct sk_buff *skb,
if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg)))
+ if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg))
goto nla_put_failure;
return 0;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index a0837c6c9283..ee2d71753746 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -37,7 +37,7 @@ struct nft_nat {
};
static void nft_nat_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_nat *priv = nft_expr_priv(expr);
@@ -49,33 +49,32 @@ static void nft_nat_eval(const struct nft_expr *expr,
if (priv->sreg_addr_min) {
if (priv->family == AF_INET) {
range.min_addr.ip = (__force __be32)
- data[priv->sreg_addr_min].data[0];
+ regs->data[priv->sreg_addr_min];
range.max_addr.ip = (__force __be32)
- data[priv->sreg_addr_max].data[0];
+ regs->data[priv->sreg_addr_max];
} else {
memcpy(range.min_addr.ip6,
- data[priv->sreg_addr_min].data,
- sizeof(struct nft_data));
+ &regs->data[priv->sreg_addr_min],
+ sizeof(range.min_addr.ip6));
memcpy(range.max_addr.ip6,
- data[priv->sreg_addr_max].data,
- sizeof(struct nft_data));
+ &regs->data[priv->sreg_addr_max],
+ sizeof(range.max_addr.ip6));
}
range.flags |= NF_NAT_RANGE_MAP_IPS;
}
if (priv->sreg_proto_min) {
range.min_proto.all =
- *(__be16 *)&data[priv->sreg_proto_min].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_min];
range.max_proto.all =
- *(__be16 *)&data[priv->sreg_proto_max].data[0];
+ *(__be16 *)&regs->data[priv->sreg_proto_max];
range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
range.flags |= priv->flags;
- data[NFT_REG_VERDICT].verdict =
- nf_nat_setup_info(ct, &range, priv->type);
+ regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type);
}
static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
@@ -119,6 +118,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_nat *priv = nft_expr_priv(expr);
+ unsigned int alen, plen;
u32 family;
int err;
@@ -146,25 +146,34 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
- if (family != AF_INET && family != AF_INET6)
- return -EAFNOSUPPORT;
if (family != ctx->afi->family)
return -EOPNOTSUPP;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip);
+ break;
+ case NFPROTO_IPV6:
+ alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6);
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
priv->sreg_addr_min =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN]));
-
- err = nft_validate_input_register(priv->sreg_addr_min);
+ nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]);
+ err = nft_validate_register_load(priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
priv->sreg_addr_max =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX]));
+ nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]);
- err = nft_validate_input_register(priv->sreg_addr_max);
+ err = nft_validate_register_load(priv->sreg_addr_max,
+ alen);
if (err < 0)
return err;
} else {
@@ -172,19 +181,21 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
}
}
+ plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
priv->sreg_proto_min =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN]));
+ nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
- err = nft_validate_input_register(priv->sreg_proto_min);
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
priv->sreg_proto_max =
- ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX]));
+ nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]);
- err = nft_validate_input_register(priv->sreg_proto_max);
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
@@ -220,18 +231,18 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (priv->sreg_addr_min) {
- if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN,
- htonl(priv->sreg_addr_min)) ||
- nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX,
- htonl(priv->sreg_addr_max)))
+ if (nft_dump_register(skb, NFTA_NAT_REG_ADDR_MIN,
+ priv->sreg_addr_min) ||
+ nft_dump_register(skb, NFTA_NAT_REG_ADDR_MAX,
+ priv->sreg_addr_max))
goto nla_put_failure;
}
if (priv->sreg_proto_min) {
- if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
- htonl(priv->sreg_proto_min)) ||
- nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
- htonl(priv->sreg_proto_max)))
+ if (nft_dump_register(skb, NFTA_NAT_REG_PROTO_MIN,
+ priv->sreg_proto_min) ||
+ nft_dump_register(skb, NFTA_NAT_REG_PROTO_MAX,
+ priv->sreg_proto_max))
goto nla_put_failure;
}
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 85daa84bfdfe..94fb3b27a2c5 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -18,12 +18,12 @@
#include <net/netfilter/nf_tables.h>
static void nft_payload_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- struct nft_data *dest = &data[priv->dreg];
+ u32 *dest = &regs->data[priv->dreg];
int offset;
switch (priv->base) {
@@ -43,11 +43,12 @@ static void nft_payload_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
- data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
@@ -62,24 +63,21 @@ static int nft_payload_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload *priv = nft_expr_priv(expr);
- int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]);
- priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
- err = nft_validate_output_register(priv->dreg);
- if (err < 0)
- return err;
- return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, priv->len);
}
static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_payload *priv = nft_expr_priv(expr);
- if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
+ if (nft_dump_register(skb, NFTA_PAYLOAD_DREG, priv->dreg) ||
nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
@@ -131,9 +129,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
}
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
- return ERR_PTR(-EINVAL);
+ len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index e8ae2f6bf232..96805d21d618 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -28,7 +28,7 @@ struct nft_queue {
};
static void nft_queue_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_queue *priv = nft_expr_priv(expr);
@@ -51,7 +51,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
- data[NFT_REG_VERDICT].verdict = ret;
+ regs->verdict.code = ret;
}
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index 46214f245665..1c30f41cff5b 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -26,25 +26,25 @@ struct nft_rbtree {
struct nft_rbtree_elem {
struct rb_node node;
- u16 flags;
- struct nft_data key;
- struct nft_data data[];
+ struct nft_set_ext ext;
};
-static bool nft_rbtree_lookup(const struct nft_set *set,
- const struct nft_data *key,
- struct nft_data *data)
+
+static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
+ const struct nft_set_ext **ext)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
- const struct rb_node *parent = priv->root.rb_node;
+ const struct rb_node *parent;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int d;
spin_lock_bh(&nft_rbtree_lock);
+ parent = priv->root.rb_node;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = parent->rb_left;
interval = rbe;
@@ -52,12 +52,17 @@ static bool nft_rbtree_lookup(const struct nft_set *set,
parent = parent->rb_right;
else {
found:
- if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ if (nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(&rbe->ext) &
+ NFT_SET_ELEM_INTERVAL_END)
goto out;
- if (set->flags & NFT_SET_MAP)
- nft_data_copy(data, rbe->data);
-
spin_unlock_bh(&nft_rbtree_lock);
+
+ *ext = &rbe->ext;
return true;
}
}
@@ -71,23 +76,13 @@ out:
return false;
}
-static void nft_rbtree_elem_destroy(const struct nft_set *set,
- struct nft_rbtree_elem *rbe)
-{
- nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_uninit(rbe->data, set->dtype);
-
- kfree(rbe);
-}
-
static int __nft_rbtree_insert(const struct nft_set *set,
struct nft_rbtree_elem *new)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
+ u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
int d;
parent = NULL;
@@ -95,13 +90,18 @@ static int __nft_rbtree_insert(const struct nft_set *set,
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, &new->key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext),
+ nft_set_ext_key(&new->ext),
+ set->klen);
if (d < 0)
p = &parent->rb_left;
else if (d > 0)
p = &parent->rb_right;
- else
- return -EEXIST;
+ else {
+ if (nft_set_elem_active(&rbe->ext, genmask))
+ return -EEXIST;
+ p = &parent->rb_left;
+ }
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
@@ -111,31 +111,13 @@ static int __nft_rbtree_insert(const struct nft_set *set,
static int nft_rbtree_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_rbtree_elem *rbe;
- unsigned int size;
+ struct nft_rbtree_elem *rbe = elem->priv;
int err;
- size = sizeof(*rbe);
- if (set->flags & NFT_SET_MAP &&
- !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
- size += sizeof(rbe->data[0]);
-
- rbe = kzalloc(size, GFP_KERNEL);
- if (rbe == NULL)
- return -ENOMEM;
-
- rbe->flags = elem->flags;
- nft_data_copy(&rbe->key, &elem->key);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(rbe->data, &elem->data);
-
spin_lock_bh(&nft_rbtree_lock);
err = __nft_rbtree_insert(set, rbe);
- if (err < 0)
- kfree(rbe);
-
spin_unlock_bh(&nft_rbtree_lock);
+
return err;
}
@@ -143,42 +125,49 @@ static void nft_rbtree_remove(const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree *priv = nft_set_priv(set);
- struct nft_rbtree_elem *rbe = elem->cookie;
+ struct nft_rbtree_elem *rbe = elem->priv;
spin_lock_bh(&nft_rbtree_lock);
rb_erase(&rbe->node, &priv->root);
spin_unlock_bh(&nft_rbtree_lock);
- kfree(rbe);
}
-static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
+static void nft_rbtree_activate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
+{
+ struct nft_rbtree_elem *rbe = elem->priv;
+
+ nft_set_elem_change_active(set, &rbe->ext);
+}
+
+static void *nft_rbtree_deactivate(const struct nft_set *set,
+ const struct nft_set_elem *elem)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
int d;
- spin_lock_bh(&nft_rbtree_lock);
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
+ set->klen);
if (d < 0)
parent = parent->rb_left;
else if (d > 0)
parent = parent->rb_right;
else {
- elem->cookie = rbe;
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(&elem->data, rbe->data);
- elem->flags = rbe->flags;
- spin_unlock_bh(&nft_rbtree_lock);
- return 0;
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
+ parent = parent->rb_left;
+ continue;
+ }
+ nft_set_elem_change_active(set, &rbe->ext);
+ return rbe;
}
}
- spin_unlock_bh(&nft_rbtree_lock);
- return -ENOENT;
+ return NULL;
}
static void nft_rbtree_walk(const struct nft_ctx *ctx,
@@ -186,21 +175,21 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set_iter *iter)
{
const struct nft_rbtree *priv = nft_set_priv(set);
- const struct nft_rbtree_elem *rbe;
+ struct nft_rbtree_elem *rbe;
struct nft_set_elem elem;
struct rb_node *node;
+ u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
spin_lock_bh(&nft_rbtree_lock);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
+
if (iter->count < iter->skip)
goto cont;
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ goto cont;
- rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_data_copy(&elem.key, &rbe->key);
- if (set->flags & NFT_SET_MAP &&
- !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
- nft_data_copy(&elem.data, rbe->data);
- elem.flags = rbe->flags;
+ elem.priv = rbe;
iter->err = iter->fn(ctx, set, iter, &elem);
if (iter->err < 0) {
@@ -237,7 +226,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_rbtree_elem_destroy(set, rbe);
+ nft_set_elem_destroy(set, rbe);
}
}
@@ -247,9 +236,6 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
unsigned int nsize;
nsize = sizeof(struct nft_rbtree_elem);
- if (features & NFT_SET_MAP)
- nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
-
if (desc->size)
est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
else
@@ -262,12 +248,14 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
static struct nft_set_ops nft_rbtree_ops __read_mostly = {
.privsize = nft_rbtree_privsize,
+ .elemsize = offsetof(struct nft_rbtree_elem, ext),
.estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
- .get = nft_rbtree_get,
+ .deactivate = nft_rbtree_deactivate,
+ .activate = nft_rbtree_activate,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.features = NFT_SET_INTERVAL | NFT_SET_MAP,
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index d7e9e93a4e90..03f7bf40ae75 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -44,25 +44,28 @@ int nft_redir_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_redir *priv = nft_expr_priv(expr);
+ unsigned int plen;
int err;
err = nft_redir_validate(ctx, expr, NULL);
if (err < 0)
return err;
+ plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
- ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN]));
+ nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
- err = nft_validate_input_register(priv->sreg_proto_min);
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
priv->sreg_proto_max =
- ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX]));
+ nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]);
- err = nft_validate_input_register(priv->sreg_proto_max);
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
@@ -85,11 +88,11 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
const struct nft_redir *priv = nft_expr_priv(expr);
if (priv->sreg_proto_min) {
- if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN,
- htonl(priv->sreg_proto_min)))
+ if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN,
+ priv->sreg_proto_min))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX,
- htonl(priv->sreg_proto_max)))
+ if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX,
+ priv->sreg_proto_max))
goto nla_put_failure;
}
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 7b5f9d58680a..62cabee42fbe 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -18,7 +18,7 @@
#include <net/netfilter/ipv6/nf_reject.h>
static void nft_reject_inet_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
+ struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
@@ -28,14 +28,16 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
case NFPROTO_IPV4:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code);
+ nf_send_unreach(pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
break;
case NFT_REJECT_TCP_RST:
nf_send_reset(pkt->skb, pkt->ops->hooknum);
break;
case NFT_REJECT_ICMPX_UNREACH:
nf_send_unreach(pkt->skb,
- nft_reject_icmp_code(priv->icmp_code));
+ nft_reject_icmp_code(priv->icmp_code),
+ pkt->ops->hooknum);
break;
}
break;
@@ -56,7 +58,8 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
}
break;
}
- data[NFT_REG_VERDICT].verdict = NF_DROP;
+
+ regs->verdict.code = NF_DROP;
}
static int nft_reject_inet_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 50e1e5aaf4ce..cca96cec1b68 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -42,15 +42,21 @@ enum nf_tproxy_lookup_t {
static bool tproxy_sk_is_transparent(struct sock *sk)
{
- if (sk->sk_state != TCP_TIME_WAIT) {
- if (inet_sk(sk)->transparent)
- return true;
- sock_put(sk);
- } else {
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
if (inet_twsk(sk)->tw_transparent)
return true;
- inet_twsk_put(inet_twsk(sk));
+ break;
+ case TCP_NEW_SYN_RECV:
+ if (inet_rsk(inet_reqsk(sk))->no_srccheck)
+ return true;
+ break;
+ default:
+ if (inet_sk(sk)->transparent)
+ return true;
}
+
+ sock_gen_put(sk);
return false;
}
@@ -266,7 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
@@ -431,7 +437,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7198d660b4de..a1d126f29463 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -39,7 +39,7 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info *info = par->matchinfo;
- if (skb->sk == NULL)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
return (info->id == skb->sk->sk_classid) ^ info->invert;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index f440f57a452f..1caaccbc306c 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -25,16 +25,15 @@ MODULE_ALIAS("ip6t_physdev");
static bool
physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct xt_physdev_info *info = par->matchinfo;
+ const struct net_device *physdev;
unsigned long ret;
const char *indev, *outdev;
- const struct nf_bridge_info *nf_bridge;
/* Not a bridged IP packet or no info available yet:
* LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
* the destination device will be a bridge. */
- if (!(nf_bridge = skb->nf_bridge)) {
+ if (!skb->nf_bridge) {
/* Return MATCH if the invert flags of the used options are on */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
!(info->invert & XT_PHYSDEV_OP_BRIDGED))
@@ -54,31 +53,41 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
return true;
}
+ physdev = nf_bridge_get_physoutdev(skb);
+ outdev = physdev ? physdev->name : NULL;
+
/* This only makes sense in the FORWARD and POSTROUTING chains */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
- (!!(nf_bridge->mask & BRNF_BRIDGED) ^
- !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+ (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
return false;
+ physdev = nf_bridge_get_physindev(skb);
+ indev = physdev ? physdev->name : NULL;
+
if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
- (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+ (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
(info->bitmask & XT_PHYSDEV_OP_ISOUT &&
- (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
+ (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
return false;
if (!(info->bitmask & XT_PHYSDEV_OP_IN))
goto match_outdev;
- indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
- ret = ifname_compare_aligned(indev, info->physindev, info->in_mask);
- if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
- return false;
+ if (indev) {
+ ret = ifname_compare_aligned(indev, info->physindev,
+ info->in_mask);
+
+ if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+ return false;
+ }
match_outdev:
if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
return true;
- outdev = nf_bridge->physoutdev ?
- nf_bridge->physoutdev->name : nulldevname;
+
+ if (!outdev)
+ return false;
+
ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 0d47afea9682..89045982ec94 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -193,7 +193,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter0(opt.ext.packets, &info->packets))
- return 0;
+ return false;
return match_counter0(opt.ext.bytes, &info->bytes);
}
@@ -239,7 +239,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
return ret;
if (!match_counter(opt.ext.packets, &info->packets))
- return 0;
+ return false;
return match_counter(opt.ext.bytes, &info->bytes);
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 13332dbf291d..e092cb046326 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -129,13 +129,24 @@ xt_socket_get_sock_v4(struct net *net, const u8 protocol,
return NULL;
}
-static bool
-socket_match(const struct sk_buff *skb, struct xt_action_param *par,
- const struct xt_socket_mtinfo1 *info)
+static bool xt_socket_sk_is_transparent(struct sock *sk)
+{
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
+ return inet_twsk(sk)->tw_transparent;
+
+ case TCP_NEW_SYN_RECV:
+ return inet_rsk(inet_reqsk(sk))->no_srccheck;
+
+ default:
+ return inet_sk(sk)->transparent;
+ }
+}
+
+static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+ const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
u8 uninitialized_var(protocol);
@@ -145,10 +156,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
#endif
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
protocol = iph->protocol;
saddr = iph->saddr;
@@ -158,16 +171,17 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
- &sport, &dport))
- return false;
+ &sport, &dport))
+ return NULL;
} else {
- return false;
+ return NULL;
}
#ifdef XT_SOCKET_HAVE_CONNTRACK
- /* Do the lookup with the original socket address in case this is a
- * reply packet of an established SNAT-ted connection. */
-
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct) &&
((iph->protocol != IPPROTO_ICMP &&
@@ -183,10 +197,18 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
}
#endif
+ return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+ const struct xt_socket_mtinfo1 *info)
+{
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v4(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -195,16 +217,14 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* unless XT_SOCKET_NOWILDCARD is set
*/
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
- sk->sk_state != TCP_TIME_WAIT &&
+ sk_fullsock(sk) &&
inet_sk(sk)->inet_rcv_saddr == 0);
/* Ignore non-transparent sockets,
- if XT_SOCKET_TRANSPARENT is used */
+ * if XT_SOCKET_TRANSPARENT is used
+ */
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = ((sk->sk_state != TCP_TIME_WAIT &&
- inet_sk(sk)->transparent) ||
- (sk->sk_state == TCP_TIME_WAIT &&
- inet_twsk(sk)->tw_transparent));
+ transparent = xt_socket_sk_is_transparent(sk);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -213,12 +233,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
sk = NULL;
}
- pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
- protocol, &saddr, ntohs(sport),
- &daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
static bool
@@ -315,28 +330,26 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
return NULL;
}
-static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+ const struct net_device *indev)
{
- struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
- const struct in6_addr *daddr = NULL, *saddr = NULL;
__be16 uninitialized_var(dport), uninitialized_var(sport);
- int thoff = 0, uninitialized_var(tproto);
- const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ const struct in6_addr *daddr = NULL, *saddr = NULL;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0) {
pr_debug("unable to find transport header in IPv6 packet, dropping\n");
- return NF_DROP;
+ return NULL;
}
if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
- hp = skb_header_pointer(skb, thoff,
- sizeof(_hdr), &_hdr);
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
saddr = &iph->saddr;
sport = hp->source;
@@ -344,17 +357,27 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
dport = hp->dest;
} else if (tproto == IPPROTO_ICMPV6) {
+ struct ipv6hdr ipv6_var;
+
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport, &ipv6_var))
- return false;
+ return NULL;
} else {
- return false;
+ return NULL;
}
+ return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v6(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -363,16 +386,14 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
* unless XT_SOCKET_NOWILDCARD is set
*/
wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
- sk->sk_state != TCP_TIME_WAIT &&
+ sk_fullsock(sk) &&
ipv6_addr_any(&sk->sk_v6_rcv_saddr));
/* Ignore non-transparent sockets,
- if XT_SOCKET_TRANSPARENT is used */
+ * if XT_SOCKET_TRANSPARENT is used
+ */
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = ((sk->sk_state != TCP_TIME_WAIT &&
- inet_sk(sk)->transparent) ||
- (sk->sk_state == TCP_TIME_WAIT &&
- inet_twsk(sk)->tw_transparent));
+ transparent = xt_socket_sk_is_transparent(sk);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -381,13 +402,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
sk = NULL;
}
- pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
- "(orig %pI6:%hu) sock %p\n",
- tproto, saddr, ntohs(sport),
- daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
#endif
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 5699adb97652..0bc3460319c8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -26,13 +26,12 @@ static bool
string_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_string_info *conf = par->matchinfo;
- struct ts_state state;
bool invert;
invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
- conf->to_offset, conf->config, &state)
+ conf->to_offset, conf->config)
!= UINT_MAX) ^ invert;
}