aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-10 19:06:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-10 19:06:09 -0800
commit8efd0d9c316af470377894a6a0f9ff63ce18c177 (patch)
tree65d00bf8c7fd8f938a42d38e44bad11d4cf08664 /net/netfilter
parentMerge tag 'media/v5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net (diff)
downloadlinux-dev-8efd0d9c316af470377894a6a0f9ff63ce18c177.tar.xz
linux-dev-8efd0d9c316af470377894a6a0f9ff63ce18c177.zip
Merge tag '5.17-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core ---- - Defer freeing TCP skbs to the BH handler, whenever possible, or at least perform the freeing outside of the socket lock section to decrease cross-CPU allocator work and improve latency. - Add netdevice refcount tracking to locate sources of netdevice and net namespace refcount leaks. - Make Tx watchdog less intrusive - avoid pausing Tx and restarting all queues from a single CPU removing latency spikes. - Various small optimizations throughout the stack from Eric Dumazet. - Make netdev->dev_addr[] constant, force modifications to go via appropriate helpers to allow us to keep addresses in ordered data structures. - Replace unix_table_lock with per-hash locks, improving performance of bind() calls. - Extend skb drop tracepoint with a drop reason. - Allow SO_MARK and SO_PRIORITY setsockopt under CAP_NET_RAW. BPF --- - New helpers: - bpf_find_vma(), find and inspect VMAs for profiling use cases - bpf_loop(), runtime-bounded loop helper trading some execution time for much faster (if at all converging) verification - bpf_strncmp(), improve performance, avoid compiler flakiness - bpf_get_func_arg(), bpf_get_func_ret(), bpf_get_func_arg_cnt() for tracing programs, all inlined by the verifier - Support BPF relocations (CO-RE) in the kernel loader. - Further the support for BTF_TYPE_TAG annotations. - Allow access to local storage in sleepable helpers. - Convert verifier argument types to a composable form with different attributes which can be shared across types (ro, maybe-null). - Prepare libbpf for upcoming v1.0 release by cleaning up APIs, creating new, extensible ones where missing and deprecating those to be removed. Protocols --------- - WiFi (mac80211/cfg80211): - notify user space about long "come back in N" AP responses, allow it to react to such temporary rejections - allow non-standard VHT MCS 10/11 rates - use coarse time in airtime fairness code to save CPU cycles - Bluetooth: - rework of HCI command execution serialization to use a common queue and work struct, and improve handling errors reported in the middle of a batch of commands - rework HCI event handling to use skb_pull_data, avoiding packet parsing pitfalls - support AOSP Bluetooth Quality Report - SMC: - support net namespaces, following the RDMA model - improve connection establishment latency by pre-clearing buffers - introduce TCP ULP for automatic redirection to SMC - Multi-Path TCP: - support ioctls: SIOCINQ, OUTQ, and OUTQNSD - support socket options: IP_TOS, IP_FREEBIND, IP_TRANSPARENT, IPV6_FREEBIND, and IPV6_TRANSPARENT, TCP_CORK and TCP_NODELAY - support cmsgs: TCP_INQ - improvements in the data scheduler (assigning data to subflows) - support fastclose option (quick shutdown of the full MPTCP connection, similar to TCP RST in regular TCP) - MCTP (Management Component Transport) over serial, as defined by DMTF spec DSP0253 - "MCTP Serial Transport Binding". Driver API ---------- - Support timestamping on bond interfaces in active/passive mode. - Introduce generic phylink link mode validation for drivers which don't have any quirks and where MAC capability bits fully express what's supported. Allow PCS layer to participate in the validation. Convert a number of drivers. - Add support to set/get size of buffers on the Rx rings and size of the tx copybreak buffer via ethtool. - Support offloading TC actions as first-class citizens rather than only as attributes of filters, improve sharing and device resource utilization. - WiFi (mac80211/cfg80211): - support forwarding offload (ndo_fill_forward_path) - support for background radar detection hardware - SA Query Procedures offload on the AP side New hardware / drivers ---------------------- - tsnep - FPGA based TSN endpoint Ethernet MAC used in PLCs with real-time requirements for isochronous communication with protocols like OPC UA Pub/Sub. - Qualcomm BAM-DMUX WWAN - driver for data channels of modems integrated into many older Qualcomm SoCs, e.g. MSM8916 or MSM8974 (qcom_bam_dmux). - Microchip LAN966x multi-port Gigabit AVB/TSN Ethernet Switch driver with support for bridging, VLANs and multicast forwarding (lan966x). - iwlmei driver for co-operating between Intel's WiFi driver and Intel's Active Management Technology (AMT) devices. - mse102x - Vertexcom MSE102x Homeplug GreenPHY chips - Bluetooth: - MediaTek MT7921 SDIO devices - Foxconn MT7922A - Realtek RTL8852AE Drivers ------- - Significantly improve performance in the datapaths of: lan78xx, ax88179_178a, lantiq_xrx200, bnxt. - Intel Ethernet NICs: - igb: support PTP/time PEROUT and EXTTS SDP functions on 82580/i354/i350 adapters - ixgbevf: new PF -> VF mailbox API which avoids the risk of mailbox corruption with ESXi - iavf: support configuration of VLAN features of finer granularity, stacked tags and filtering - ice: PTP support for new E822 devices with sub-ns precision - ice: support firmware activation without reboot - Mellanox Ethernet NICs (mlx5): - expose control over IRQ coalescing mode (CQE vs EQE) via ethtool - support TC forwarding when tunnel encap and decap happen between two ports of the same NIC - dynamically size and allow disabling various features to save resources for running in embedded / SmartNIC scenarios - Broadcom Ethernet NICs (bnxt): - use page frag allocator to improve Rx performance - expose control over IRQ coalescing mode (CQE vs EQE) via ethtool - Other Ethernet NICs: - amd-xgbe: add Ryzen 6000 (Yellow Carp) Ethernet support - Microsoft cloud/virtual NIC (mana): - add XDP support (PASS, DROP, TX) - Mellanox Ethernet switches (mlxsw): - initial support for Spectrum-4 ASICs - VxLAN with IPv6 underlay - Marvell Ethernet switches (prestera): - support flower flow templates - add basic IP forwarding support - NXP embedded Ethernet switches (ocelot & felix): - support Per-Stream Filtering and Policing (PSFP) - enable cut-through forwarding between ports by default - support FDMA to improve packet Rx/Tx to CPU - Other embedded switches: - hellcreek: improve trapping management (STP and PTP) packets - qca8k: support link aggregation and port mirroring - Qualcomm 802.11ax WiFi (ath11k): - qca6390, wcn6855: enable 802.11 power save mode in station mode - BSS color change support - WCN6855 hw2.1 support - 11d scan offload support - scan MAC address randomization support - full monitor mode, only supported on QCN9074 - qca6390/wcn6855: report signal and tx bitrate - qca6390: rfkill support - qca6390/wcn6855: regdb.bin support - Intel WiFi (iwlwifi): - support SAR GEO Offset Mapping (SGOM) and Time-Aware-SAR (TAS) in cooperation with the BIOS - support for Optimized Connectivity Experience (OCE) scan - support firmware API version 68 - lots of preparatory work for the upcoming Bz device family - MediaTek WiFi (mt76): - Specific Absorption Rate (SAR) support - mt7921: 160 MHz channel support - RealTek WiFi (rtw88): - Specific Absorption Rate (SAR) support - scan offload - Other WiFi NICs - ath10k: support fetching (pre-)calibration data from nvmem - brcmfmac: configure keep-alive packet on suspend - wcn36xx: beacon filter support" * tag '5.17-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2048 commits) tcp: tcp_send_challenge_ack delete useless param `skb` net/qla3xxx: Remove useless DMA-32 fallback configuration rocker: Remove useless DMA-32 fallback configuration hinic: Remove useless DMA-32 fallback configuration lan743x: Remove useless DMA-32 fallback configuration net: enetc: Remove useless DMA-32 fallback configuration cxgb4vf: Remove useless DMA-32 fallback configuration cxgb4: Remove useless DMA-32 fallback configuration cxgb3: Remove useless DMA-32 fallback configuration bnx2x: Remove useless DMA-32 fallback configuration et131x: Remove useless DMA-32 fallback configuration be2net: Remove useless DMA-32 fallback configuration vmxnet3: Remove useless DMA-32 fallback configuration bna: Simplify DMA setting net: alteon: Simplify DMA setting myri10ge: Simplify DMA setting qlcnic: Simplify DMA setting net: allwinner: Fix print format page_pool: remove spinlock in page_pool_refill_alloc_cache() amt: fix wrong return type of amt_send_membership_update() ...
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/Makefile3
-rw-r--r--net/netfilter/core.c29
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c7
-rw-r--r--net/netfilter/nf_conntrack_core.c68
-rw-r--r--net/netfilter/nf_conntrack_expect.c6
-rw-r--r--net/netfilter/nf_conntrack_netlink.c14
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_flow_table_core.c2
-rw-r--r--net/netfilter/nf_flow_table_inet.c26
-rw-r--r--net/netfilter/nf_nat_core.c47
-rw-r--r--net/netfilter/nf_nat_masquerade.c4
-rw-r--r--net/netfilter/nf_synproxy_core.c1
-rw-r--r--net/netfilter/nf_tables_api.c160
-rw-r--r--net/netfilter/nf_tables_core.c87
-rw-r--r--net/netfilter/nf_tables_trace.c2
-rw-r--r--net/netfilter/nfnetlink_hook.c1
-rw-r--r--net/netfilter/nfnetlink_log.c5
-rw-r--r--net/netfilter/nfnetlink_queue.c14
-rw-r--r--net/netfilter/nft_bitwise.c95
-rw-r--r--net/netfilter/nft_connlimit.c26
-rw-r--r--net/netfilter/nft_counter.c58
-rw-r--r--net/netfilter/nft_ct.c4
-rw-r--r--net/netfilter/nft_fwd_netdev.c7
-rw-r--r--net/netfilter/nft_last.c69
-rw-r--r--net/netfilter/nft_limit.c172
-rw-r--r--net/netfilter/nft_meta.c48
-rw-r--r--net/netfilter/nft_numgen.c34
-rw-r--r--net/netfilter/nft_payload.c60
-rw-r--r--net/netfilter/nft_quota.c52
-rw-r--r--net/netfilter/nft_reject_netdev.c1
-rw-r--r--net/netfilter/nft_set_pipapo.c8
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c4
-rw-r--r--net/netfilter/xt_CT.c3
34 files changed, 818 insertions, 309 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3646fc195e7d..ddc54b6d18ee 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -515,12 +515,6 @@ config NFT_FLOW_OFFLOAD
This option adds the "flow_offload" expression that you can use to
choose what flows are placed into the hardware.
-config NFT_COUNTER
- tristate "Netfilter nf_tables counter module"
- help
- This option adds the "counter" expression that you can use to
- include packet and byte counters in a rule.
-
config NFT_CONNLIMIT
tristate "Netfilter nf_tables connlimit module"
depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index aab20e575ecd..a135b1a46014 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -75,7 +75,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nf_tables_trace.o nft_immediate.o nft_cmp.o nft_range.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o nft_last.o \
- nft_chain_route.o nf_tables_offload.o \
+ nft_counter.o nft_chain_route.o nf_tables_offload.o \
nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o \
nft_set_pipapo.o
@@ -100,7 +100,6 @@ obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_REJECT_NETDEV) += nft_reject_netdev.o
obj-$(CONFIG_NFT_TUNNEL) += nft_tunnel.o
-obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
obj-$(CONFIG_NFT_REDIR) += nft_redir.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 6dec9cd395f1..354cb472f386 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -666,32 +666,29 @@ EXPORT_SYMBOL(nf_hook_slow_list);
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
-struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+const struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nfnl_ct_hook);
-struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
+const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-/* This does not belong here, but locally generated errors need it if connection
- tracking in use: without this, connection may not be in hash table, and hence
- manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
- __rcu __read_mostly;
-EXPORT_SYMBOL(ip_ct_attach);
-
-struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
+const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_hook);
+/* This does not belong here, but locally generated errors need it if connection
+ * tracking in use: without this, connection may not be in hash table, and hence
+ * manufactured ICMP or RST packets will not be associated with it.
+ */
void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
{
- void (*attach)(struct sk_buff *, const struct sk_buff *);
+ const struct nf_ct_hook *ct_hook;
if (skb->_nfct) {
rcu_read_lock();
- attach = rcu_dereference(ip_ct_attach);
- if (attach)
- attach(new, skb);
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (ct_hook)
+ ct_hook->attach(new, skb);
rcu_read_unlock();
}
}
@@ -699,7 +696,7 @@ EXPORT_SYMBOL(nf_ct_attach);
void nf_conntrack_destroy(struct nf_conntrack *nfct)
{
- struct nf_ct_hook *ct_hook;
+ const struct nf_ct_hook *ct_hook;
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
@@ -712,7 +709,7 @@ EXPORT_SYMBOL(nf_conntrack_destroy);
bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
const struct sk_buff *skb)
{
- struct nf_ct_hook *ct_hook;
+ const struct nf_ct_hook *ct_hook;
bool ret = false;
rcu_read_lock();
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 39c523bd775c..7f645328b47f 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -960,8 +960,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Create a destination for the given service
*/
static int
-ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
- struct ip_vs_dest **dest_p)
+ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
unsigned int atype, i;
@@ -1021,8 +1020,6 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
spin_lock_init(&dest->stats.lock);
__ip_vs_update_dest(svc, dest, udest, 1);
- *dest_p = dest;
-
LeaveFunction(2);
return 0;
@@ -1096,7 +1093,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Allocate and initialize the dest structure
*/
- ret = ip_vs_new_dest(svc, udest, &dest);
+ ret = ip_vs_new_dest(svc, udest);
}
LeaveFunction(2);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4712a90a1820..894a325d39f2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -47,6 +47,7 @@
#include <net/netfilter/nf_conntrack_timeout.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
@@ -189,7 +190,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
seqcount_spinlock_t nf_conntrack_generation __read_mostly;
-static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
+static siphash_aligned_key_t nf_conntrack_hash_rnd;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
unsigned int zoneid,
@@ -482,7 +483,7 @@ EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
*/
u32 nf_ct_get_id(const struct nf_conn *ct)
{
- static __read_mostly siphash_key_t ct_id_seed;
+ static siphash_aligned_key_t ct_id_seed;
unsigned long a, b, c, d;
net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
@@ -558,7 +559,7 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
#define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK)
-/* Released via destroy_conntrack() */
+/* Released via nf_ct_destroy() */
struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
const struct nf_conntrack_zone *zone,
gfp_t flags)
@@ -585,7 +586,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
nf_ct_zone_add(tmpl, zone);
- atomic_set(&tmpl->ct_general.use, 0);
+ refcount_set(&tmpl->ct_general.use, 1);
return tmpl;
}
@@ -612,13 +613,12 @@ static void destroy_gre_conntrack(struct nf_conn *ct)
#endif
}
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
+void nf_ct_destroy(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- pr_debug("destroy_conntrack(%p)\n", ct);
- WARN_ON(atomic_read(&nfct->use) != 0);
+ pr_debug("%s(%p)\n", __func__, ct);
+ WARN_ON(refcount_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
@@ -643,9 +643,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (ct->master)
nf_ct_put(ct->master);
- pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct);
+ pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
nf_conntrack_free(ct);
}
+EXPORT_SYMBOL(nf_ct_destroy);
static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
@@ -742,7 +743,7 @@ nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2)
/* caller must hold rcu readlock and none of the nf_conntrack_locks */
static void nf_ct_gc_expired(struct nf_conn *ct)
{
- if (!atomic_inc_not_zero(&ct->ct_general.use))
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
return;
if (nf_ct_should_gc(ct))
@@ -810,7 +811,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
* in, try to obtain a reference and re-check tuple
*/
ct = nf_ct_tuplehash_to_ctrack(h);
- if (likely(atomic_inc_not_zero(&ct->ct_general.use))) {
+ if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
goto found;
@@ -907,7 +908,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
smp_wmb();
/* The caller holds a reference to this object */
- atomic_set(&ct->ct_general.use, 2);
+ refcount_set(&ct->ct_general.use, 2);
__nf_conntrack_hash_insert(ct, hash, reply_hash);
nf_conntrack_double_unlock(hash, reply_hash);
NF_CT_STAT_INC(net, insert);
@@ -958,7 +959,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct)
{
struct nf_conn_tstamp *tstamp;
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
ct->status |= IPS_CONFIRMED;
/* set conntrack timestamp, if enabled. */
@@ -989,7 +990,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb,
nf_ct_acct_merge(ct, ctinfo, loser_ct);
nf_ct_add_to_dying_list(loser_ct);
- nf_conntrack_put(&loser_ct->ct_general);
+ nf_ct_put(loser_ct);
nf_ct_set(skb, ct, ctinfo);
NF_CT_STAT_INC(net, clash_resolve);
@@ -1351,7 +1352,7 @@ static unsigned int early_drop_list(struct net *net,
nf_ct_is_dying(tmp))
continue;
- if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
/* kill only if still in same netns -- might have moved due to
@@ -1469,7 +1470,7 @@ static void gc_worker(struct work_struct *work)
continue;
/* need to take reference to avoid possible races */
- if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
if (gc_worker_skip_ct(tmp)) {
@@ -1562,16 +1563,14 @@ __nf_conntrack_alloc(struct net *net,
ct->status = 0;
WRITE_ONCE(ct->timeout, 0);
write_pnet(&ct->ct_net, net);
- memset(&ct->__nfct_init_offset, 0,
- offsetof(struct nf_conn, proto) -
- offsetof(struct nf_conn, __nfct_init_offset));
+ memset_after(ct, 0, __nfct_init_offset);
nf_ct_zone_add(ct, zone);
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
- atomic_set(&ct->ct_general.use, 0);
+ refcount_set(&ct->ct_general.use, 0);
return ct;
out:
atomic_dec(&cnet->count);
@@ -1596,7 +1595,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
- WARN_ON(atomic_read(&ct->ct_general.use) != 0);
+ WARN_ON(refcount_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
@@ -1688,8 +1687,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
- /* Now it is inserted into the unconfirmed list, bump refcount */
- nf_conntrack_get(&ct->ct_general);
+ /* Now it is inserted into the unconfirmed list, set refcount to 1. */
+ refcount_set(&ct->ct_general.use, 1);
nf_ct_add_to_unconfirmed_list(ct);
local_bh_enable();
@@ -1749,6 +1748,9 @@ resolve_normal_ct(struct nf_conn *tmpl,
return 0;
if (IS_ERR(h))
return PTR_ERR(h);
+
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ ct->local_origin = state->hook == NF_INET_LOCAL_OUT;
}
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1920,7 +1922,7 @@ repeat:
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
- nf_conntrack_put(&ct->ct_general);
+ nf_ct_put(ct);
skb->_nfct = 0;
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
if (ret == -NF_DROP)
@@ -2084,9 +2086,9 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
+ const struct nf_nat_hook *nat_hook;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
- struct nf_nat_hook *nat_hook;
unsigned int status;
int dataoff;
u16 l3num;
@@ -2299,7 +2301,7 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
return NULL;
found:
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
spin_unlock(lockp);
local_bh_enable();
return ct;
@@ -2454,7 +2456,6 @@ static int kill_all(struct nf_conn *i, void *data)
void nf_conntrack_cleanup_start(void)
{
conntrack_gc_work.exiting = true;
- RCU_INIT_POINTER(ip_ct_attach, NULL);
}
void nf_conntrack_cleanup_end(void)
@@ -2590,7 +2591,6 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
- old_size = nf_conntrack_htable_size;
old_hash = nf_conntrack_hash;
nf_conntrack_hash = hash;
@@ -2629,7 +2629,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
static __always_inline unsigned int total_extension_size(void)
{
/* remember to add new extensions below */
- BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
+ BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
return sizeof(struct nf_ct_ext) +
sizeof(struct nf_conn_help)
@@ -2653,6 +2653,9 @@ static __always_inline unsigned int total_extension_size(void)
#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ sizeof(struct nf_conn_synproxy)
#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+ + sizeof(struct nf_conn_act_ct_ext)
+#endif
;
};
@@ -2770,16 +2773,15 @@ err_cachep:
return ret;
}
-static struct nf_ct_hook nf_conntrack_hook = {
+static const struct nf_ct_hook nf_conntrack_hook = {
.update = nf_conntrack_update,
- .destroy = destroy_conntrack,
+ .destroy = nf_ct_destroy,
.get_tuple_skb = nf_conntrack_get_tuple_skb,
+ .attach = nf_conntrack_attach,
};
void nf_conntrack_init_end(void)
{
- /* For use by REJECT target */
- RCU_INIT_POINTER(ip_ct_attach, nf_conntrack_attach);
RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook);
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index f562eeef4234..96948e98ec53 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
-static siphash_key_t nf_ct_expect_hashrnd __read_mostly;
+static siphash_aligned_key_t nf_ct_expect_hashrnd;
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -203,12 +203,12 @@ nf_ct_find_expectation(struct net *net,
* about to invoke ->destroy(), or nf_ct_delete() via timeout
* or early_drop().
*
- * The atomic_inc_not_zero() check tells: If that fails, we
+ * The refcount_inc_not_zero() check tells: If that fails, we
* know that the ct is being destroyed. If it succeeds, we
* can be sure the ct cannot disappear underneath.
*/
if (unlikely(nf_ct_is_dying(exp->master) ||
- !atomic_inc_not_zero(&exp->master->ct_general.use)))
+ !refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
if (exp->flags & NF_CT_EXPECT_PERMANENT) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index ec4164c32d27..ac438370f94a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -508,7 +508,7 @@ nla_put_failure:
static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
{
- if (nla_put_be32(skb, CTA_USE, htonl(atomic_read(&ct->ct_general.use))))
+ if (nla_put_be32(skb, CTA_USE, htonl(refcount_read(&ct->ct_general.use))))
goto nla_put_failure;
return 0;
@@ -1198,7 +1198,7 @@ restart:
ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_is_expired(ct)) {
if (i < ARRAY_SIZE(nf_ct_evict) &&
- atomic_inc_not_zero(&ct->ct_general.use))
+ refcount_inc_not_zero(&ct->ct_general.use))
nf_ct_evict[i++] = ct;
continue;
}
@@ -1747,9 +1747,9 @@ restart:
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
- ct, dying ? true : false, 0);
+ ct, dying, 0);
if (res < 0) {
- if (!atomic_inc_not_zero(&ct->ct_general.use))
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
continue;
cb->args[0] = cpu;
cb->args[1] = (unsigned long)ct;
@@ -1820,7 +1820,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
const struct nlattr *attr)
__must_hold(RCU)
{
- struct nf_nat_hook *nat_hook;
+ const struct nf_nat_hook *nat_hook;
int err;
nat_hook = rcu_dereference(nf_nat_hook);
@@ -2922,7 +2922,7 @@ static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
}
-static struct nfnl_ct_hook ctnetlink_glue_hook = {
+static const struct nfnl_ct_hook ctnetlink_glue_hook = {
.build_size = ctnetlink_glue_build_size,
.build = ctnetlink_glue_build,
.parse = ctnetlink_glue_parse,
@@ -2996,7 +2996,7 @@ static const union nf_inet_addr any_addr;
static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
{
- static __read_mostly siphash_key_t exp_id_seed;
+ static siphash_aligned_key_t exp_id_seed;
unsigned long a, b, c, d;
net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 80f675d884b2..3e1afd10a9b6 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -303,7 +303,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
int ret = 0;
WARN_ON(!ct);
- if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
return 0;
if (nf_ct_should_gc(ct)) {
@@ -370,7 +370,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
ct_show_delta_time(s, ct);
- seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
+ seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use));
if (seq_has_overflowed(s))
goto release;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index ed37bb9b4e58..b90eca7a2f22 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -48,7 +48,7 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
struct flow_offload *flow;
if (unlikely(nf_ct_is_dying(ct) ||
- !atomic_inc_not_zero(&ct->ct_general.use)))
+ !refcount_inc_not_zero(&ct->ct_general.use)))
return NULL;
flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index bc4126d8ef65..5c57ade6bd05 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -54,8 +54,30 @@ static struct nf_flowtable_type flowtable_inet = {
.owner = THIS_MODULE,
};
+static struct nf_flowtable_type flowtable_ipv4 = {
+ .family = NFPROTO_IPV4,
+ .init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route_ipv4,
+ .free = nf_flow_table_free,
+ .hook = nf_flow_offload_ip_hook,
+ .owner = THIS_MODULE,
+};
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .init = nf_flow_table_init,
+ .setup = nf_flow_table_offload_setup,
+ .action = nf_flow_rule_route_ipv6,
+ .free = nf_flow_table_free,
+ .hook = nf_flow_offload_ipv6_hook,
+ .owner = THIS_MODULE,
+};
+
static int __init nf_flow_inet_module_init(void)
{
+ nft_register_flowtable_type(&flowtable_ipv4);
+ nft_register_flowtable_type(&flowtable_ipv6);
nft_register_flowtable_type(&flowtable_inet);
return 0;
@@ -64,6 +86,8 @@ static int __init nf_flow_inet_module_init(void)
static void __exit nf_flow_inet_module_exit(void)
{
nft_unregister_flowtable_type(&flowtable_inet);
+ nft_unregister_flowtable_type(&flowtable_ipv6);
+ nft_unregister_flowtable_type(&flowtable_ipv4);
}
module_init(nf_flow_inet_module_init);
@@ -71,5 +95,7 @@ module_exit(nf_flow_inet_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET);
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
MODULE_ALIAS_NF_FLOWTABLE(1); /* NFPROTO_INET */
MODULE_DESCRIPTION("Netfilter flow table mixed IPv4/IPv6 module");
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 4d50d51db796..2d06a66899b2 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -34,7 +34,7 @@ static unsigned int nat_net_id __read_mostly;
static struct hlist_head *nf_nat_bysource __read_mostly;
static unsigned int nf_nat_htable_size __read_mostly;
-static siphash_key_t nf_nat_hash_rnd __read_mostly;
+static siphash_aligned_key_t nf_nat_hash_rnd;
struct nf_nat_lookup_hook_priv {
struct nf_hook_entries __rcu *entries;
@@ -494,6 +494,38 @@ another_round:
goto another_round;
}
+static bool tuple_force_port_remap(const struct nf_conntrack_tuple *tuple)
+{
+ u16 sp, dp;
+
+ switch (tuple->dst.protonum) {
+ case IPPROTO_TCP:
+ sp = ntohs(tuple->src.u.tcp.port);
+ dp = ntohs(tuple->dst.u.tcp.port);
+ break;
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ sp = ntohs(tuple->src.u.udp.port);
+ dp = ntohs(tuple->dst.u.udp.port);
+ break;
+ default:
+ return false;
+ }
+
+ /* IANA: System port range: 1-1023,
+ * user port range: 1024-49151,
+ * private port range: 49152-65535.
+ *
+ * Linux default ephemeral port range is 32768-60999.
+ *
+ * Enforce port remapping if sport is significantly lower
+ * than dport to prevent NAT port shadowing, i.e.
+ * accidental match of 'new' inbound connection vs.
+ * existing outbound one.
+ */
+ return sp < 16384 && dp >= 32768;
+}
+
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
* we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
@@ -507,11 +539,17 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
+ bool random_port = range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL;
const struct nf_conntrack_zone *zone;
struct net *net = nf_ct_net(ct);
zone = nf_ct_zone(ct);
+ if (maniptype == NF_NAT_MANIP_SRC &&
+ !random_port &&
+ !ct->local_origin)
+ random_port = tuple_force_port_remap(orig_tuple);
+
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
* range, use that.
@@ -520,8 +558,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
* So far, we don't do local source mappings, so multiple
* manips not an issue.
*/
- if (maniptype == NF_NAT_MANIP_SRC &&
- !(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
+ if (maniptype == NF_NAT_MANIP_SRC && !random_port) {
/* try the original tuple first */
if (in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -545,7 +582,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
*/
/* Only bother mapping if it's not already in range and unique */
- if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
+ if (!random_port) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
l4proto_in_range(tuple, maniptype,
@@ -1130,7 +1167,7 @@ static struct pernet_operations nat_net_ops = {
.size = sizeof(struct nat_net),
};
-static struct nf_nat_hook nat_hook = {
+static const struct nf_nat_hook nat_hook = {
.parse_nat_setup = nfnetlink_parse_nat_setup,
#ifdef CONFIG_XFRM
.decode_session = __nf_nat_decode_session,
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index acd73f717a08..e32fac374608 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -12,6 +12,7 @@
struct masq_dev_work {
struct work_struct work;
struct net *net;
+ netns_tracker ns_tracker;
union nf_inet_addr addr;
int ifindex;
int (*iter)(struct nf_conn *i, void *data);
@@ -82,7 +83,7 @@ static void iterate_cleanup_work(struct work_struct *work)
nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
- put_net(w->net);
+ put_net_track(w->net, &w->ns_tracker);
kfree(w);
atomic_dec(&masq_worker_count);
module_put(THIS_MODULE);
@@ -119,6 +120,7 @@ static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = ifindex;
w->net = net;
+ netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
w->iter = iter;
if (addr)
w->addr = *addr;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 3d6d49420db8..2dfc5dae0656 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -349,7 +349,6 @@ static int __net_init synproxy_net_init(struct net *net)
goto err2;
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
- nf_conntrack_get(&ct->ct_general);
snet->tmpl = ct;
snet->stats = alloc_percpu(struct synproxy_stats);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c20772822637..eb12fc9b803d 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1747,16 +1747,16 @@ static void nft_chain_stats_replace(struct nft_trans *trans)
static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
{
- struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0);
- struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1);
+ struct nft_rule_blob *g0 = rcu_dereference_raw(chain->blob_gen_0);
+ struct nft_rule_blob *g1 = rcu_dereference_raw(chain->blob_gen_1);
if (g0 != g1)
kvfree(g1);
kvfree(g0);
/* should be NULL either via abort or via successful commit */
- WARN_ON_ONCE(chain->rules_next);
- kvfree(chain->rules_next);
+ WARN_ON_ONCE(chain->blob_next);
+ kvfree(chain->blob_next);
}
void nf_tables_chain_destroy(struct nft_ctx *ctx)
@@ -2002,23 +2002,39 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
struct nft_rules_old {
struct rcu_head h;
- struct nft_rule **start;
+ struct nft_rule_blob *blob;
};
-static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain,
- unsigned int alloc)
+static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr)
{
- if (alloc > INT_MAX)
+ struct nft_rule_dp *prule;
+
+ prule = (struct nft_rule_dp *)ptr;
+ prule->is_last = 1;
+ ptr += offsetof(struct nft_rule_dp, data);
+ /* blob size does not include the trailer rule */
+}
+
+static struct nft_rule_blob *nf_tables_chain_alloc_rules(unsigned int size)
+{
+ struct nft_rule_blob *blob;
+
+ /* size must include room for the last rule */
+ if (size < offsetof(struct nft_rule_dp, data))
+ return NULL;
+
+ size += sizeof(struct nft_rule_blob) + sizeof(struct nft_rules_old);
+ if (size > INT_MAX)
return NULL;
- alloc += 1; /* NULL, ends rules */
- if (sizeof(struct nft_rule *) > INT_MAX / alloc)
+ blob = kvmalloc(size, GFP_KERNEL);
+ if (!blob)
return NULL;
- alloc *= sizeof(struct nft_rule *);
- alloc += sizeof(struct nft_rules_old);
+ blob->size = 0;
+ nft_last_rule(blob, blob->data);
- return kvmalloc(alloc, GFP_KERNEL);
+ return blob;
}
static void nft_basechain_hook_init(struct nf_hook_ops *ops, u8 family,
@@ -2091,9 +2107,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
+ struct nft_rule_blob *blob;
struct nft_trans *trans;
struct nft_chain *chain;
- struct nft_rule **rules;
+ unsigned int data_size;
int err;
if (table->use == UINT_MAX)
@@ -2178,15 +2195,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]);
}
- rules = nf_tables_chain_alloc_rules(chain, 0);
- if (!rules) {
+ data_size = offsetof(struct nft_rule_dp, data); /* last rule */
+ blob = nf_tables_chain_alloc_rules(data_size);
+ if (!blob) {
err = -ENOMEM;
goto err_destroy_chain;
}
- *rules = NULL;
- rcu_assign_pointer(chain->rules_gen_0, rules);
- rcu_assign_pointer(chain->rules_gen_1, rules);
+ RCU_INIT_POINTER(chain->blob_gen_0, blob);
+ RCU_INIT_POINTER(chain->blob_gen_1, blob);
err = nf_tables_register_hook(net, table, chain);
if (err < 0)
@@ -8241,32 +8258,83 @@ EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
{
+ const struct nft_expr *expr, *last;
+ struct nft_regs_track track = {};
+ unsigned int size, data_size;
+ void *data, *data_boundary;
+ struct nft_rule_dp *prule;
struct nft_rule *rule;
- unsigned int alloc = 0;
int i;
/* already handled or inactive chain? */
- if (chain->rules_next || !nft_is_active_next(net, chain))
+ if (chain->blob_next || !nft_is_active_next(net, chain))
return 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
i = 0;
list_for_each_entry_continue(rule, &chain->rules, list) {
- if (nft_is_active_next(net, rule))
- alloc++;
+ if (nft_is_active_next(net, rule)) {
+ data_size += sizeof(*prule) + rule->dlen;
+ if (data_size > INT_MAX)
+ return -ENOMEM;
+ }
}
+ data_size += offsetof(struct nft_rule_dp, data); /* last rule */
- chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc);
- if (!chain->rules_next)
+ chain->blob_next = nf_tables_chain_alloc_rules(data_size);
+ if (!chain->blob_next)
return -ENOMEM;
+ data = (void *)chain->blob_next->data;
+ data_boundary = data + data_size;
+ size = 0;
+
list_for_each_entry_continue(rule, &chain->rules, list) {
- if (nft_is_active_next(net, rule))
- chain->rules_next[i++] = rule;
+ if (!nft_is_active_next(net, rule))
+ continue;
+
+ prule = (struct nft_rule_dp *)data;
+ data += offsetof(struct nft_rule_dp, data);
+ if (WARN_ON_ONCE(data > data_boundary))
+ return -ENOMEM;
+
+ size = 0;
+ track.last = last;
+ nft_rule_for_each_expr(expr, last, rule) {
+ track.cur = expr;
+
+ if (expr->ops->reduce &&
+ expr->ops->reduce(&track, expr)) {
+ expr = track.cur;
+ continue;
+ }
+
+ if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
+ return -ENOMEM;
+
+ memcpy(data + size, expr, expr->ops->size);
+ size += expr->ops->size;
+ }
+ if (WARN_ON_ONCE(size >= 1 << 12))
+ return -ENOMEM;
+
+ prule->handle = rule->handle;
+ prule->dlen = size;
+ prule->is_last = 0;
+
+ data += size;
+ size = 0;
+ chain->blob_next->size += (unsigned long)(data - (void *)prule);
}
- chain->rules_next[i] = NULL;
+ prule = (struct nft_rule_dp *)data;
+ data += offsetof(struct nft_rule_dp, data);
+ if (WARN_ON_ONCE(data > data_boundary))
+ return -ENOMEM;
+
+ nft_last_rule(chain->blob_next, prule);
+
return 0;
}
@@ -8280,8 +8348,8 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net)
if (trans->msg_type == NFT_MSG_NEWRULE ||
trans->msg_type == NFT_MSG_DELRULE) {
- kvfree(chain->rules_next);
- chain->rules_next = NULL;
+ kvfree(chain->blob_next);
+ chain->blob_next = NULL;
}
}
}
@@ -8290,38 +8358,34 @@ static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h)
{
struct nft_rules_old *o = container_of(h, struct nft_rules_old, h);
- kvfree(o->start);
+ kvfree(o->blob);
}
-static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
+static void nf_tables_commit_chain_free_rules_old(struct nft_rule_blob *blob)
{
- struct nft_rule **r = rules;
struct nft_rules_old *old;
- while (*r)
- r++;
-
- r++; /* rcu_head is after end marker */
- old = (void *) r;
- old->start = rules;
+ /* rcu_head is after end marker */
+ old = (void *)blob + sizeof(*blob) + blob->size;
+ old->blob = blob;
call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
}
static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
{
- struct nft_rule **g0, **g1;
+ struct nft_rule_blob *g0, *g1;
bool next_genbit;
next_genbit = nft_gencursor_next(net);
- g0 = rcu_dereference_protected(chain->rules_gen_0,
+ g0 = rcu_dereference_protected(chain->blob_gen_0,
lockdep_commit_lock_is_held(net));
- g1 = rcu_dereference_protected(chain->rules_gen_1,
+ g1 = rcu_dereference_protected(chain->blob_gen_1,
lockdep_commit_lock_is_held(net));
/* No changes to this chain? */
- if (chain->rules_next == NULL) {
+ if (chain->blob_next == NULL) {
/* chain had no change in last or next generation */
if (g0 == g1)
return;
@@ -8330,10 +8394,10 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
* one uses same rules as current generation.
*/
if (next_genbit) {
- rcu_assign_pointer(chain->rules_gen_1, g0);
+ rcu_assign_pointer(chain->blob_gen_1, g0);
nf_tables_commit_chain_free_rules_old(g1);
} else {
- rcu_assign_pointer(chain->rules_gen_0, g1);
+ rcu_assign_pointer(chain->blob_gen_0, g1);
nf_tables_commit_chain_free_rules_old(g0);
}
@@ -8341,11 +8405,11 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
}
if (next_genbit)
- rcu_assign_pointer(chain->rules_gen_1, chain->rules_next);
+ rcu_assign_pointer(chain->blob_gen_1, chain->blob_next);
else
- rcu_assign_pointer(chain->rules_gen_0, chain->rules_next);
+ rcu_assign_pointer(chain->blob_gen_0, chain->blob_next);
- chain->rules_next = NULL;
+ chain->blob_next = NULL;
if (g0 == g1)
return;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index adc348056076..36e73f9828c5 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -38,7 +38,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
static inline void nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
- const struct nft_rule *rule,
+ const struct nft_rule_dp *rule,
enum nft_trace_types type)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
@@ -67,6 +67,36 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
+static noinline void __nft_trace_verdict(struct nft_traceinfo *info,
+ const struct nft_chain *chain,
+ const struct nft_regs *regs)
+{
+ enum nft_trace_types type;
+
+ switch (regs->verdict.code) {
+ case NFT_CONTINUE:
+ case NFT_RETURN:
+ type = NFT_TRACETYPE_RETURN;
+ break;
+ default:
+ type = NFT_TRACETYPE_RULE;
+ break;
+ }
+
+ __nft_trace_packet(info, chain, type);
+}
+
+static inline void nft_trace_verdict(struct nft_traceinfo *info,
+ const struct nft_chain *chain,
+ const struct nft_rule_dp *rule,
+ const struct nft_regs *regs)
+{
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ info->rule = rule;
+ __nft_trace_verdict(info, chain, regs);
+ }
+}
+
static bool nft_payload_fast_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -110,7 +140,6 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
base_chain = nft_base_chain(chain);
- rcu_read_lock();
pstats = READ_ONCE(base_chain->stats);
if (pstats) {
local_bh_disable();
@@ -121,12 +150,12 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
u64_stats_update_end(&stats->syncp);
local_bh_enable();
}
- rcu_read_unlock();
}
struct nft_jumpstack {
- const struct nft_chain *chain;
- struct nft_rule *const *rules;
+ const struct nft_chain *chain;
+ const struct nft_rule_dp *rule;
+ const struct nft_rule_dp *last_rule;
};
static void expr_call_ops_eval(const struct nft_expr *expr,
@@ -141,6 +170,7 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
X(e, nft_payload_eval);
X(e, nft_cmp_eval);
+ X(e, nft_counter_eval);
X(e, nft_meta_get_eval);
X(e, nft_lookup_eval);
X(e, nft_range_eval);
@@ -154,18 +184,28 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
expr->ops->eval(expr, regs, pkt);
}
+#define nft_rule_expr_first(rule) (struct nft_expr *)&rule->data[0]
+#define nft_rule_expr_next(expr) ((void *)expr) + expr->ops->size
+#define nft_rule_expr_last(rule) (struct nft_expr *)&rule->data[rule->dlen]
+#define nft_rule_next(rule) (void *)rule + sizeof(*rule) + rule->dlen
+
+#define nft_rule_dp_for_each_expr(expr, last, rule) \
+ for ((expr) = nft_rule_expr_first(rule), (last) = nft_rule_expr_last(rule); \
+ (expr) != (last); \
+ (expr) = nft_rule_expr_next(expr))
+
unsigned int
nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{
const struct nft_chain *chain = priv, *basechain = chain;
+ const struct nft_rule_dp *rule, *last_rule;
const struct net *net = nft_net(pkt);
- struct nft_rule *const *rules;
- const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
+ struct nft_rule_blob *blob;
struct nft_traceinfo info;
info.trace = false;
@@ -173,16 +213,16 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
nft_trace_init(&info, pkt, &regs.verdict, basechain);
do_chain:
if (genbit)
- rules = rcu_dereference(chain->rules_gen_1);
+ blob = rcu_dereference(chain->blob_gen_1);
else
- rules = rcu_dereference(chain->rules_gen_0);
+ blob = rcu_dereference(chain->blob_gen_0);
+ rule = (struct nft_rule_dp *)blob->data;
+ last_rule = (void *)blob->data + blob->size;
next_rule:
- rule = *rules;
regs.verdict.code = NFT_CONTINUE;
- for (; *rules ; rules++) {
- rule = *rules;
- nft_rule_for_each_expr(expr, last, rule) {
+ for (; rule < last_rule; rule = nft_rule_next(rule)) {
+ nft_rule_dp_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, &regs);
else if (expr->ops == &nft_bitwise_fast_ops)
@@ -207,13 +247,13 @@ next_rule:
break;
}
+ nft_trace_verdict(&info, chain, rule, &regs);
+
switch (regs.verdict.code & NF_VERDICT_MASK) {
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
case NF_STOLEN:
- nft_trace_packet(&info, chain, rule,
- NFT_TRACETYPE_RULE);
return regs.verdict.code;
}
@@ -222,28 +262,25 @@ next_rule:
if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
return NF_DROP;
jumpstack[stackptr].chain = chain;
- jumpstack[stackptr].rules = rules + 1;
+ jumpstack[stackptr].rule = nft_rule_next(rule);
+ jumpstack[stackptr].last_rule = last_rule;
stackptr++;
fallthrough;
case NFT_GOTO:
- nft_trace_packet(&info, chain, rule,
- NFT_TRACETYPE_RULE);
-
chain = regs.verdict.chain;
goto do_chain;
case NFT_CONTINUE:
case NFT_RETURN:
- nft_trace_packet(&info, chain, rule,
- NFT_TRACETYPE_RETURN);
break;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
}
if (stackptr > 0) {
stackptr--;
chain = jumpstack[stackptr].chain;
- rules = jumpstack[stackptr].rules;
+ rule = jumpstack[stackptr].rule;
+ last_rule = jumpstack[stackptr].last_rule;
goto next_rule;
}
@@ -269,18 +306,22 @@ static struct nft_expr_type *nft_basic_types[] = {
&nft_rt_type,
&nft_exthdr_type,
&nft_last_type,
+ &nft_counter_type,
};
static struct nft_object_type *nft_basic_objects[] = {
#ifdef CONFIG_NETWORK_SECMARK
&nft_secmark_obj_type,
#endif
+ &nft_counter_obj_type,
};
int __init nf_tables_core_module_init(void)
{
int err, i, j = 0;
+ nft_counter_init_seqcount();
+
for (i = 0; i < ARRAY_SIZE(nft_basic_objects); i++) {
err = nft_register_obj(nft_basic_objects[i]);
if (err)
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 84a7dea46efa..5041725423c2 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -142,7 +142,7 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
const struct nft_traceinfo *info)
{
- if (!info->rule)
+ if (!info->rule || info->rule->is_last)
return 0;
/* a continue verdict with ->type == RETURN means that this is
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index d5c719c9e36c..71e29adac48b 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -6,6 +6,7 @@
*/
#include <linux/module.h>
+#include <linux/kallsyms.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/skbuff.h>
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 7f83f9697fc1..ae9c0756bba5 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -66,6 +66,7 @@ struct nfulnl_instance {
struct sk_buff *skb; /* pre-allocatd skb */
struct timer_list timer;
struct net *net;
+ netns_tracker ns_tracker;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
u32 peer_portid; /* PORTID of the peer process */
@@ -140,7 +141,7 @@ static void nfulnl_instance_free_rcu(struct rcu_head *head)
struct nfulnl_instance *inst =
container_of(head, struct nfulnl_instance, rcu);
- put_net(inst->net);
+ put_net_track(inst->net, &inst->ns_tracker);
kfree(inst);
module_put(THIS_MODULE);
}
@@ -187,7 +188,7 @@ instance_create(struct net *net, u_int16_t group_num,
timer_setup(&inst->timer, nfulnl_timer, 0);
- inst->net = get_net(net);
+ inst->net = get_net_track(net, &inst->ns_tracker, GFP_ATOMIC);
inst->peer_user_ns = user_ns;
inst->peer_portid = portid;
inst->group_num = group_num;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f0b9e21a2452..ea2d9c2a44cf 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -225,7 +225,7 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
- struct nf_ct_hook *ct_hook;
+ const struct nf_ct_hook *ct_hook;
int err;
if (verdict == NF_ACCEPT ||
@@ -388,7 +388,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct net_device *outdev;
struct nf_conn *ct = NULL;
enum ip_conntrack_info ctinfo = 0;
- struct nfnl_ct_hook *nfnl_ct;
+ const struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;
u32 seclen = 0;
@@ -1104,7 +1104,7 @@ static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
return 0;
}
-static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
+static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct,
const struct nlmsghdr *nlh,
const struct nlattr * const nfqa[],
struct nf_queue_entry *entry,
@@ -1171,11 +1171,11 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
{
struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
u_int16_t queue_num = ntohs(info->nfmsg->res_id);
+ const struct nfnl_ct_hook *nfnl_ct;
struct nfqnl_msg_verdict_hdr *vhdr;
enum ip_conntrack_info ctinfo;
struct nfqnl_instance *queue;
struct nf_queue_entry *entry;
- struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
unsigned int verdict;
int err;
@@ -1528,15 +1528,9 @@ static void __net_exit nfnl_queue_net_exit(struct net *net)
WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
}
-static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list)
-{
- synchronize_rcu();
-}
-
static struct pernet_operations nfnl_queue_net_ops = {
.init = nfnl_queue_net_init,
.exit = nfnl_queue_net_exit,
- .exit_batch = nfnl_queue_net_exit_batch,
.id = &nfnl_queue_net_id,
.size = sizeof(struct nfnl_queue_net),
};
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 47b0dba95054..7b727d3ebf9d 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -278,12 +278,52 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static bool nft_bitwise_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_bitwise *priv = nft_expr_priv(expr);
+ const struct nft_bitwise *bitwise;
+
+ if (!track->regs[priv->sreg].selector)
+ return false;
+
+ bitwise = nft_expr_priv(expr);
+ if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
+ track->regs[priv->dreg].bitwise &&
+ track->regs[priv->dreg].bitwise->ops == expr->ops &&
+ priv->sreg == bitwise->sreg &&
+ priv->dreg == bitwise->dreg &&
+ priv->op == bitwise->op &&
+ priv->len == bitwise->len &&
+ !memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) &&
+ !memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) &&
+ !memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) {
+ track->cur = expr;
+ return true;
+ }
+
+ if (track->regs[priv->sreg].bitwise) {
+ track->regs[priv->dreg].selector = NULL;
+ track->regs[priv->dreg].bitwise = NULL;
+ return false;
+ }
+
+ if (priv->sreg != priv->dreg) {
+ track->regs[priv->dreg].selector =
+ track->regs[priv->sreg].selector;
+ }
+ track->regs[priv->dreg].bitwise = expr;
+
+ return false;
+}
+
static const struct nft_expr_ops nft_bitwise_ops = {
.type = &nft_bitwise_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
.eval = nft_bitwise_eval,
.init = nft_bitwise_init,
.dump = nft_bitwise_dump,
+ .reduce = nft_bitwise_reduce,
.offload = nft_bitwise_offload,
};
@@ -385,12 +425,49 @@ static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static bool nft_bitwise_fast_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr);
+ const struct nft_bitwise_fast_expr *bitwise;
+
+ if (!track->regs[priv->sreg].selector)
+ return false;
+
+ bitwise = nft_expr_priv(expr);
+ if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector &&
+ track->regs[priv->dreg].bitwise &&
+ track->regs[priv->dreg].bitwise->ops == expr->ops &&
+ priv->sreg == bitwise->sreg &&
+ priv->dreg == bitwise->dreg &&
+ priv->mask == bitwise->mask &&
+ priv->xor == bitwise->xor) {
+ track->cur = expr;
+ return true;
+ }
+
+ if (track->regs[priv->sreg].bitwise) {
+ track->regs[priv->dreg].selector = NULL;
+ track->regs[priv->dreg].bitwise = NULL;
+ return false;
+ }
+
+ if (priv->sreg != priv->dreg) {
+ track->regs[priv->dreg].selector =
+ track->regs[priv->sreg].selector;
+ }
+ track->regs[priv->dreg].bitwise = expr;
+
+ return false;
+}
+
const struct nft_expr_ops nft_bitwise_fast_ops = {
.type = &nft_bitwise_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)),
.eval = NULL, /* inlined */
.init = nft_bitwise_fast_init,
.dump = nft_bitwise_fast_dump,
+ .reduce = nft_bitwise_fast_reduce,
.offload = nft_bitwise_fast_offload,
};
@@ -427,3 +504,21 @@ struct nft_expr_type nft_bitwise_type __read_mostly = {
.maxattr = NFTA_BITWISE_MAX,
.owner = THIS_MODULE,
};
+
+bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_expr *last = track->last;
+ const struct nft_expr *next;
+
+ if (expr == last)
+ return false;
+
+ next = nft_expr_next(expr);
+ if (next->ops == &nft_bitwise_ops)
+ return nft_bitwise_reduce(track, next);
+ else if (next->ops == &nft_bitwise_fast_ops)
+ return nft_bitwise_fast_reduce(track, next);
+
+ return false;
+}
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
index 7d0761fad37e..58dcafe8bf79 100644
--- a/net/netfilter/nft_connlimit.c
+++ b/net/netfilter/nft_connlimit.c
@@ -14,7 +14,7 @@
#include <net/netfilter/nf_conntrack_zones.h>
struct nft_connlimit {
- struct nf_conncount_list list;
+ struct nf_conncount_list *list;
u32 limit;
bool invert;
};
@@ -43,12 +43,12 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
return;
}
- if (nf_conncount_add(nft_net(pkt), &priv->list, tuple_ptr, zone)) {
+ if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
regs->verdict.code = NF_DROP;
return;
}
- count = priv->list.count;
+ count = priv->list->count;
if ((count > priv->limit) ^ priv->invert) {
regs->verdict.code = NFT_BREAK;
@@ -76,7 +76,11 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx,
invert = true;
}
- nf_conncount_list_init(&priv->list);
+ priv->list = kmalloc(sizeof(*priv->list), GFP_KERNEL);
+ if (!priv->list)
+ return -ENOMEM;
+
+ nf_conncount_list_init(priv->list);
priv->limit = limit;
priv->invert = invert;
@@ -87,7 +91,8 @@ static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
struct nft_connlimit *priv)
{
nf_ct_netns_put(ctx->net, ctx->family);
- nf_conncount_cache_free(&priv->list);
+ nf_conncount_cache_free(priv->list);
+ kfree(priv->list);
}
static int nft_connlimit_do_dump(struct sk_buff *skb,
@@ -200,7 +205,11 @@ static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
struct nft_connlimit *priv_dst = nft_expr_priv(dst);
struct nft_connlimit *priv_src = nft_expr_priv(src);
- nf_conncount_list_init(&priv_dst->list);
+ priv_dst->list = kmalloc(sizeof(*priv_dst->list), GFP_ATOMIC);
+ if (priv_dst->list)
+ return -ENOMEM;
+
+ nf_conncount_list_init(priv_dst->list);
priv_dst->limit = priv_src->limit;
priv_dst->invert = priv_src->invert;
@@ -212,7 +221,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
{
struct nft_connlimit *priv = nft_expr_priv(expr);
- nf_conncount_cache_free(&priv->list);
+ nf_conncount_cache_free(priv->list);
+ kfree(priv->list);
}
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
@@ -221,7 +231,7 @@ static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
bool ret;
local_bh_disable();
- ret = nf_conncount_gc_list(net, &priv->list);
+ ret = nf_conncount_gc_list(net, priv->list);
local_bh_enable();
return ret;
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 8edd3b3c173d..f179e8c3b0ca 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -13,6 +13,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables_offload.h>
struct nft_counter {
@@ -174,7 +175,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
[NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
};
-static struct nft_object_type nft_counter_obj_type;
+struct nft_object_type nft_counter_obj_type;
static const struct nft_object_ops nft_counter_obj_ops = {
.type = &nft_counter_obj_type,
.size = sizeof(struct nft_counter_percpu_priv),
@@ -184,7 +185,7 @@ static const struct nft_object_ops nft_counter_obj_ops = {
.dump = nft_counter_obj_dump,
};
-static struct nft_object_type nft_counter_obj_type __read_mostly = {
+struct nft_object_type nft_counter_obj_type __read_mostly = {
.type = NFT_OBJECT_COUNTER,
.ops = &nft_counter_obj_ops,
.maxattr = NFTA_COUNTER_MAX,
@@ -192,9 +193,8 @@ static struct nft_object_type nft_counter_obj_type __read_mostly = {
.owner = THIS_MODULE,
};
-static void nft_counter_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
@@ -275,7 +275,15 @@ static void nft_counter_offload_stats(struct nft_expr *expr,
preempt_enable();
}
-static struct nft_expr_type nft_counter_type;
+void nft_counter_init_seqcount(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
+}
+
+struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)),
@@ -289,7 +297,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.offload_stats = nft_counter_offload_stats,
};
-static struct nft_expr_type nft_counter_type __read_mostly = {
+struct nft_expr_type nft_counter_type __read_mostly = {
.name = "counter",
.ops = &nft_counter_ops,
.policy = nft_counter_policy,
@@ -297,39 +305,3 @@ static struct nft_expr_type nft_counter_type __read_mostly = {
.flags = NFT_EXPR_STATEFUL,
.owner = THIS_MODULE,
};
-
-static int __init nft_counter_module_init(void)
-{
- int cpu, err;
-
- for_each_possible_cpu(cpu)
- seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu));
-
- err = nft_register_obj(&nft_counter_obj_type);
- if (err < 0)
- return err;
-
- err = nft_register_expr(&nft_counter_type);
- if (err < 0)
- goto err1;
-
- return 0;
-err1:
- nft_unregister_obj(&nft_counter_obj_type);
- return err;
-}
-
-static void __exit nft_counter_module_exit(void)
-{
- nft_unregister_expr(&nft_counter_type);
- nft_unregister_obj(&nft_counter_obj_type);
-}
-
-module_init(nft_counter_module_init);
-module_exit(nft_counter_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("counter");
-MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_COUNTER);
-MODULE_DESCRIPTION("nftables counter rule support");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 99b1de14ff7e..518d96c8c247 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -259,7 +259,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
ct = this_cpu_read(nft_ct_pcpu_template);
- if (likely(atomic_read(&ct->ct_general.use) == 1)) {
+ if (likely(refcount_read(&ct->ct_general.use) == 1)) {
nf_ct_zone_add(ct, &zone);
} else {
/* previous skb got queued to userspace */
@@ -270,7 +270,6 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
}
}
- atomic_inc(&ct->ct_general.use);
nf_ct_set(skb, ct, IP_CT_NEW);
}
#endif
@@ -375,7 +374,6 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
- atomic_set(&tmp->ct_general.use, 1);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index cd59afde5b2f..fa9301ca6033 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -27,9 +27,11 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
{
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
int oif = regs->data[priv->sreg_dev];
+ struct sk_buff *skb = pkt->skb;
/* This is used by ifb only. */
- skb_set_redirected(pkt->skb, true);
+ skb->skb_iif = skb->dev->ifindex;
+ skb_set_redirected(skb, nft_hook(pkt) == NF_NETDEV_INGRESS);
nf_fwd_netdev_egress(pkt, oif);
regs->verdict.code = NF_STOLEN;
@@ -198,7 +200,8 @@ static int nft_fwd_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
- return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
+ return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS) |
+ (1 << NF_NETDEV_EGRESS));
}
static struct nft_expr_type nft_fwd_netdev_type;
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 304e33cbed9b..5ee33d0ccd4e 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -8,9 +8,13 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+struct nft_last {
+ unsigned long jiffies;
+ unsigned int set;
+};
+
struct nft_last_priv {
- unsigned long last_jiffies;
- unsigned int last_set;
+ struct nft_last *last;
};
static const struct nla_policy nft_last_policy[NFTA_LAST_MAX + 1] = {
@@ -22,47 +26,55 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[])
{
struct nft_last_priv *priv = nft_expr_priv(expr);
+ struct nft_last *last;
u64 last_jiffies;
- u32 last_set = 0;
int err;
- if (tb[NFTA_LAST_SET]) {
- last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
- if (last_set == 1)
- priv->last_set = 1;
- }
+ last = kzalloc(sizeof(*last), GFP_KERNEL);
+ if (!last)
+ return -ENOMEM;
+
+ if (tb[NFTA_LAST_SET])
+ last->set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
- if (last_set && tb[NFTA_LAST_MSECS]) {
+ if (last->set && tb[NFTA_LAST_MSECS]) {
err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
if (err < 0)
- return err;
+ goto err;
- priv->last_jiffies = jiffies - (unsigned long)last_jiffies;
+ last->jiffies = jiffies - (unsigned long)last_jiffies;
}
+ priv->last = last;
return 0;
+err:
+ kfree(last);
+
+ return err;
}
static void nft_last_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
struct nft_last_priv *priv = nft_expr_priv(expr);
+ struct nft_last *last = priv->last;
- if (READ_ONCE(priv->last_jiffies) != jiffies)
- WRITE_ONCE(priv->last_jiffies, jiffies);
- if (READ_ONCE(priv->last_set) == 0)
- WRITE_ONCE(priv->last_set, 1);
+ if (READ_ONCE(last->jiffies) != jiffies)
+ WRITE_ONCE(last->jiffies, jiffies);
+ if (READ_ONCE(last->set) == 0)
+ WRITE_ONCE(last->set, 1);
}
static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_last_priv *priv = nft_expr_priv(expr);
- unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
- u32 last_set = READ_ONCE(priv->last_set);
+ struct nft_last *last = priv->last;
+ unsigned long last_jiffies = READ_ONCE(last->jiffies);
+ u32 last_set = READ_ONCE(last->set);
__be64 msecs;
if (time_before(jiffies, last_jiffies)) {
- WRITE_ONCE(priv->last_set, 0);
+ WRITE_ONCE(last->set, 0);
last_set = 0;
}
@@ -81,11 +93,32 @@ nla_put_failure:
return -1;
}
+static void nft_last_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_last_priv *priv = nft_expr_priv(expr);
+
+ kfree(priv->last);
+}
+
+static int nft_last_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_last_priv *priv_dst = nft_expr_priv(dst);
+
+ priv_dst->last = kzalloc(sizeof(*priv_dst->last), GFP_ATOMIC);
+ if (priv_dst->last)
+ return -ENOMEM;
+
+ return 0;
+}
+
static const struct nft_expr_ops nft_last_ops = {
.type = &nft_last_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_last_priv)),
.eval = nft_last_eval,
.init = nft_last_init,
+ .destroy = nft_last_destroy,
+ .clone = nft_last_clone,
.dump = nft_last_dump,
};
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 82ec27bdf941..f04be5be73a0 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -18,6 +18,10 @@ struct nft_limit {
spinlock_t lock;
u64 last;
u64 tokens;
+};
+
+struct nft_limit_priv {
+ struct nft_limit *limit;
u64 tokens_max;
u64 rate;
u64 nsecs;
@@ -25,33 +29,33 @@ struct nft_limit {
bool invert;
};
-static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
+static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
{
u64 now, tokens;
s64 delta;
- spin_lock_bh(&limit->lock);
+ spin_lock_bh(&priv->limit->lock);
now = ktime_get_ns();
- tokens = limit->tokens + now - limit->last;
- if (tokens > limit->tokens_max)
- tokens = limit->tokens_max;
+ tokens = priv->limit->tokens + now - priv->limit->last;
+ if (tokens > priv->tokens_max)
+ tokens = priv->tokens_max;
- limit->last = now;
+ priv->limit->last = now;
delta = tokens - cost;
if (delta >= 0) {
- limit->tokens = delta;
- spin_unlock_bh(&limit->lock);
- return limit->invert;
+ priv->limit->tokens = delta;
+ spin_unlock_bh(&priv->limit->lock);
+ return priv->invert;
}
- limit->tokens = tokens;
- spin_unlock_bh(&limit->lock);
- return !limit->invert;
+ priv->limit->tokens = tokens;
+ spin_unlock_bh(&priv->limit->lock);
+ return !priv->invert;
}
/* Use same default as in iptables. */
#define NFT_LIMIT_PKT_BURST_DEFAULT 5
-static int nft_limit_init(struct nft_limit *limit,
+static int nft_limit_init(struct nft_limit_priv *priv,
const struct nlattr * const tb[], bool pkts)
{
u64 unit, tokens;
@@ -60,58 +64,62 @@ static int nft_limit_init(struct nft_limit *limit,
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
- limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
- limit->nsecs = unit * NSEC_PER_SEC;
- if (limit->rate == 0 || limit->nsecs < unit)
+ priv->nsecs = unit * NSEC_PER_SEC;
+ if (priv->rate == 0 || priv->nsecs < unit)
return -EOVERFLOW;
if (tb[NFTA_LIMIT_BURST])
- limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+ priv->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
- if (pkts && limit->burst == 0)
- limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
+ if (pkts && priv->burst == 0)
+ priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
- if (limit->rate + limit->burst < limit->rate)
+ if (priv->rate + priv->burst < priv->rate)
return -EOVERFLOW;
if (pkts) {
- tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst;
+ tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst;
} else {
/* The token bucket size limits the number of tokens can be
* accumulated. tokens_max specifies the bucket size.
* tokens_max = unit * (rate + burst) / rate.
*/
- tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst),
- limit->rate);
+ tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst),
+ priv->rate);
}
- limit->tokens = tokens;
- limit->tokens_max = limit->tokens;
+ priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL);
+ if (!priv->limit)
+ return -ENOMEM;
+
+ priv->limit->tokens = tokens;
+ priv->tokens_max = priv->limit->tokens;
if (tb[NFTA_LIMIT_FLAGS]) {
u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
if (flags & NFT_LIMIT_F_INV)
- limit->invert = true;
+ priv->invert = true;
}
- limit->last = ktime_get_ns();
- spin_lock_init(&limit->lock);
+ priv->limit->last = ktime_get_ns();
+ spin_lock_init(&priv->limit->lock);
return 0;
}
-static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit_priv *priv,
enum nft_limit_type type)
{
- u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0;
- u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
+ u32 flags = priv->invert ? NFT_LIMIT_F_INV : 0;
+ u64 secs = div_u64(priv->nsecs, NSEC_PER_SEC);
- if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate),
+ if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate),
NFTA_LIMIT_PAD) ||
nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs),
NFTA_LIMIT_PAD) ||
- nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
+ nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(priv->burst)) ||
nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)) ||
nla_put_be32(skb, NFTA_LIMIT_FLAGS, htonl(flags)))
goto nla_put_failure;
@@ -121,8 +129,34 @@ nla_put_failure:
return -1;
}
-struct nft_limit_pkts {
- struct nft_limit limit;
+static void nft_limit_destroy(const struct nft_ctx *ctx,
+ const struct nft_limit_priv *priv)
+{
+ kfree(priv->limit);
+}
+
+static int nft_limit_clone(struct nft_limit_priv *priv_dst,
+ const struct nft_limit_priv *priv_src)
+{
+ priv_dst->tokens_max = priv_src->tokens_max;
+ priv_dst->rate = priv_src->rate;
+ priv_dst->nsecs = priv_src->nsecs;
+ priv_dst->burst = priv_src->burst;
+ priv_dst->invert = priv_src->invert;
+
+ priv_dst->limit = kmalloc(sizeof(*priv_dst->limit), GFP_ATOMIC);
+ if (priv_dst->limit)
+ return -ENOMEM;
+
+ spin_lock_init(&priv_dst->limit->lock);
+ priv_dst->limit->tokens = priv_src->tokens_max;
+ priv_dst->limit->last = ktime_get_ns();
+
+ return 0;
+}
+
+struct nft_limit_priv_pkts {
+ struct nft_limit_priv limit;
u64 cost;
};
@@ -130,7 +164,7 @@ static void nft_limit_pkts_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_limit_pkts *priv = nft_expr_priv(expr);
+ struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
if (nft_limit_eval(&priv->limit, priv->cost))
regs->verdict.code = NFT_BREAK;
@@ -148,7 +182,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nft_limit_pkts *priv = nft_expr_priv(expr);
+ struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
int err;
err = nft_limit_init(&priv->limit, tb, true);
@@ -161,17 +195,35 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
- const struct nft_limit_pkts *priv = nft_expr_priv(expr);
+ const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
}
+static void nft_limit_pkts_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_limit_priv_pkts *priv = nft_expr_priv(expr);
+
+ nft_limit_destroy(ctx, &priv->limit);
+}
+
+static int nft_limit_pkts_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_limit_priv_pkts *priv_dst = nft_expr_priv(dst);
+ struct nft_limit_priv_pkts *priv_src = nft_expr_priv(src);
+
+ return nft_limit_clone(&priv_dst->limit, &priv_src->limit);
+}
+
static struct nft_expr_type nft_limit_type;
static const struct nft_expr_ops nft_limit_pkts_ops = {
.type = &nft_limit_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
.eval = nft_limit_pkts_eval,
.init = nft_limit_pkts_init,
+ .destroy = nft_limit_pkts_destroy,
+ .clone = nft_limit_pkts_clone,
.dump = nft_limit_pkts_dump,
};
@@ -179,7 +231,7 @@ static void nft_limit_bytes_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_limit *priv = nft_expr_priv(expr);
+ struct nft_limit_priv *priv = nft_expr_priv(expr);
u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
if (nft_limit_eval(priv, cost))
@@ -190,7 +242,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nft_limit *priv = nft_expr_priv(expr);
+ struct nft_limit_priv *priv = nft_expr_priv(expr);
return nft_limit_init(priv, tb, false);
}
@@ -198,17 +250,35 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx,
static int nft_limit_bytes_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
- const struct nft_limit *priv = nft_expr_priv(expr);
+ const struct nft_limit_priv *priv = nft_expr_priv(expr);
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
+static void nft_limit_bytes_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_limit_priv *priv = nft_expr_priv(expr);
+
+ nft_limit_destroy(ctx, priv);
+}
+
+static int nft_limit_bytes_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_limit_priv *priv_dst = nft_expr_priv(dst);
+ struct nft_limit_priv *priv_src = nft_expr_priv(src);
+
+ return nft_limit_clone(priv_dst, priv_src);
+}
+
static const struct nft_expr_ops nft_limit_bytes_ops = {
.type = &nft_limit_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv)),
.eval = nft_limit_bytes_eval,
.init = nft_limit_bytes_init,
.dump = nft_limit_bytes_dump,
+ .clone = nft_limit_bytes_clone,
+ .destroy = nft_limit_bytes_destroy,
};
static const struct nft_expr_ops *
@@ -240,7 +310,7 @@ static void nft_limit_obj_pkts_eval(struct nft_object *obj,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_limit_pkts *priv = nft_obj_data(obj);
+ struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
if (nft_limit_eval(&priv->limit, priv->cost))
regs->verdict.code = NFT_BREAK;
@@ -250,7 +320,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
{
- struct nft_limit_pkts *priv = nft_obj_data(obj);
+ struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
int err;
err = nft_limit_init(&priv->limit, tb, true);
@@ -265,7 +335,7 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
struct nft_object *obj,
bool reset)
{
- const struct nft_limit_pkts *priv = nft_obj_data(obj);
+ const struct nft_limit_priv_pkts *priv = nft_obj_data(obj);
return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
}
@@ -273,7 +343,7 @@ static int nft_limit_obj_pkts_dump(struct sk_buff *skb,
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_pkts_ops = {
.type = &nft_limit_obj_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_priv_pkts)),
.init = nft_limit_obj_pkts_init,
.eval = nft_limit_obj_pkts_eval,
.dump = nft_limit_obj_pkts_dump,
@@ -283,7 +353,7 @@ static void nft_limit_obj_bytes_eval(struct nft_object *obj,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_limit *priv = nft_obj_data(obj);
+ struct nft_limit_priv *priv = nft_obj_data(obj);
u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate);
if (nft_limit_eval(priv, cost))
@@ -294,7 +364,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
{
- struct nft_limit *priv = nft_obj_data(obj);
+ struct nft_limit_priv *priv = nft_obj_data(obj);
return nft_limit_init(priv, tb, false);
}
@@ -303,7 +373,7 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
struct nft_object *obj,
bool reset)
{
- const struct nft_limit *priv = nft_obj_data(obj);
+ const struct nft_limit_priv *priv = nft_obj_data(obj);
return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
}
@@ -311,7 +381,7 @@ static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
static struct nft_object_type nft_limit_obj_type;
static const struct nft_object_ops nft_limit_obj_bytes_ops = {
.type = &nft_limit_obj_type,
- .size = sizeof(struct nft_limit),
+ .size = sizeof(struct nft_limit_priv),
.init = nft_limit_obj_bytes_init,
.eval = nft_limit_obj_bytes_eval,
.dump = nft_limit_obj_bytes_dump,
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index fe91ff5f8fbe..5ab4df56c945 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -750,16 +750,63 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
return 0;
}
+static bool nft_meta_get_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+ const struct nft_meta *meta;
+
+ if (!track->regs[priv->dreg].selector ||
+ track->regs[priv->dreg].selector->ops != expr->ops) {
+ track->regs[priv->dreg].selector = expr;
+ track->regs[priv->dreg].bitwise = NULL;
+ return false;
+ }
+
+ meta = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->key != meta->key ||
+ priv->dreg != meta->dreg) {
+ track->regs[priv->dreg].selector = expr;
+ track->regs[priv->dreg].bitwise = NULL;
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return nft_expr_reduce_bitwise(track, expr);
+}
+
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_get_eval,
.init = nft_meta_get_init,
.dump = nft_meta_get_dump,
+ .reduce = nft_meta_get_reduce,
.validate = nft_meta_get_validate,
.offload = nft_meta_get_offload,
};
+static bool nft_meta_set_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ int i;
+
+ for (i = 0; i < NFT_REG32_NUM; i++) {
+ if (!track->regs[i].selector)
+ continue;
+
+ if (track->regs[i].selector->ops != &nft_meta_get_ops)
+ continue;
+
+ track->regs[i].selector = NULL;
+ track->regs[i].bitwise = NULL;
+ }
+
+ return false;
+}
+
static const struct nft_expr_ops nft_meta_set_ops = {
.type = &nft_meta_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
@@ -767,6 +814,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
.init = nft_meta_set_init,
.destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
+ .reduce = nft_meta_set_reduce,
.validate = nft_meta_set_validate,
};
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 722cac1e90e0..1d378efd8823 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
struct nft_ng_inc {
u8 dreg;
u32 modulus;
- atomic_t counter;
+ atomic_t *counter;
u32 offset;
};
@@ -27,9 +27,9 @@ static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
u32 nval, oval;
do {
- oval = atomic_read(&priv->counter);
+ oval = atomic_read(priv->counter);
nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
- } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+ } while (atomic_cmpxchg(priv->counter, oval, nval) != oval);
return nval + priv->offset;
}
@@ -55,6 +55,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ng_inc *priv = nft_expr_priv(expr);
+ int err;
if (tb[NFTA_NG_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
@@ -66,10 +67,22 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- atomic_set(&priv->counter, priv->modulus - 1);
+ priv->counter = kmalloc(sizeof(*priv->counter), GFP_KERNEL);
+ if (!priv->counter)
+ return -ENOMEM;
- return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
- NULL, NFT_DATA_VALUE, sizeof(u32));
+ atomic_set(priv->counter, priv->modulus - 1);
+
+ err = nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
+ if (err < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(priv->counter);
+
+ return err;
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
@@ -98,6 +111,14 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
priv->offset);
}
+static void nft_ng_inc_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+ kfree(priv->counter);
+}
+
struct nft_ng_random {
u8 dreg;
u32 modulus;
@@ -157,6 +178,7 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
.eval = nft_ng_inc_eval,
.init = nft_ng_inc_init,
+ .destroy = nft_ng_inc_destroy,
.dump = nft_ng_inc_dump,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index bd689938a2e0..940fed9a760b 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -157,7 +157,8 @@ void nft_payload_eval(const struct nft_expr *expr,
goto err;
break;
default:
- BUG();
+ WARN_ON_ONCE(1);
+ goto err;
}
offset += priv->offset;
@@ -209,6 +210,34 @@ nla_put_failure:
return -1;
}
+static bool nft_payload_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ const struct nft_payload *priv = nft_expr_priv(expr);
+ const struct nft_payload *payload;
+
+ if (!track->regs[priv->dreg].selector ||
+ track->regs[priv->dreg].selector->ops != expr->ops) {
+ track->regs[priv->dreg].selector = expr;
+ track->regs[priv->dreg].bitwise = NULL;
+ return false;
+ }
+
+ payload = nft_expr_priv(track->regs[priv->dreg].selector);
+ if (priv->base != payload->base ||
+ priv->offset != payload->offset ||
+ priv->len != payload->len) {
+ track->regs[priv->dreg].selector = expr;
+ track->regs[priv->dreg].bitwise = NULL;
+ return false;
+ }
+
+ if (!track->regs[priv->dreg].bitwise)
+ return true;
+
+ return nft_expr_reduce_bitwise(track, expr);
+}
+
static bool nft_payload_offload_mask(struct nft_offload_reg *reg,
u32 priv_len, u32 field_len)
{
@@ -512,6 +541,7 @@ static const struct nft_expr_ops nft_payload_ops = {
.eval = nft_payload_eval,
.init = nft_payload_init,
.dump = nft_payload_dump,
+ .reduce = nft_payload_reduce,
.offload = nft_payload_offload,
};
@@ -521,6 +551,7 @@ const struct nft_expr_ops nft_payload_fast_ops = {
.eval = nft_payload_eval,
.init = nft_payload_init,
.dump = nft_payload_dump,
+ .reduce = nft_payload_reduce,
.offload = nft_payload_offload,
};
@@ -546,6 +577,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
struct sk_buff *skb,
unsigned int *l4csum_offset)
{
+ if (pkt->fragoff)
+ return -1;
+
switch (pkt->tprot) {
case IPPROTO_TCP:
*l4csum_offset = offsetof(struct tcphdr, check);
@@ -664,7 +698,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr,
goto err;
break;
default:
- BUG();
+ WARN_ON_ONCE(1);
+ goto err;
}
csum_offset = offset + priv->csum_offset;
@@ -766,12 +801,33 @@ nla_put_failure:
return -1;
}
+static bool nft_payload_set_reduce(struct nft_regs_track *track,
+ const struct nft_expr *expr)
+{
+ int i;
+
+ for (i = 0; i < NFT_REG32_NUM; i++) {
+ if (!track->regs[i].selector)
+ continue;
+
+ if (track->regs[i].selector->ops != &nft_payload_ops &&
+ track->regs[i].selector->ops != &nft_payload_fast_ops)
+ continue;
+
+ track->regs[i].selector = NULL;
+ track->regs[i].bitwise = NULL;
+ }
+
+ return false;
+}
+
static const struct nft_expr_ops nft_payload_set_ops = {
.type = &nft_payload_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
.eval = nft_payload_set_eval,
.init = nft_payload_set_init,
.dump = nft_payload_set_dump,
+ .reduce = nft_payload_set_reduce,
};
static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index c4d1389f7185..0484aef74273 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -15,13 +15,13 @@
struct nft_quota {
atomic64_t quota;
unsigned long flags;
- atomic64_t consumed;
+ atomic64_t *consumed;
};
static inline bool nft_overquota(struct nft_quota *priv,
const struct sk_buff *skb)
{
- return atomic64_add_return(skb->len, &priv->consumed) >=
+ return atomic64_add_return(skb->len, priv->consumed) >=
atomic64_read(&priv->quota);
}
@@ -90,13 +90,23 @@ static int nft_quota_do_init(const struct nlattr * const tb[],
return -EOPNOTSUPP;
}
+ priv->consumed = kmalloc(sizeof(*priv->consumed), GFP_KERNEL);
+ if (!priv->consumed)
+ return -ENOMEM;
+
atomic64_set(&priv->quota, quota);
priv->flags = flags;
- atomic64_set(&priv->consumed, consumed);
+ atomic64_set(priv->consumed, consumed);
return 0;
}
+static void nft_quota_do_destroy(const struct nft_ctx *ctx,
+ struct nft_quota *priv)
+{
+ kfree(priv->consumed);
+}
+
static int nft_quota_obj_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
@@ -128,7 +138,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
* that we see, don't go over the quota boundary in what we send to
* userspace.
*/
- consumed = atomic64_read(&priv->consumed);
+ consumed = atomic64_read(priv->consumed);
quota = atomic64_read(&priv->quota);
if (consumed >= quota) {
consumed_cap = quota;
@@ -145,7 +155,7 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv,
goto nla_put_failure;
if (reset) {
- atomic64_sub(consumed, &priv->consumed);
+ atomic64_sub(consumed, priv->consumed);
clear_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags);
}
return 0;
@@ -162,11 +172,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj,
return nft_quota_do_dump(skb, priv, reset);
}
+static void nft_quota_obj_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_quota *priv = nft_obj_data(obj);
+
+ return nft_quota_do_destroy(ctx, priv);
+}
+
static struct nft_object_type nft_quota_obj_type;
static const struct nft_object_ops nft_quota_obj_ops = {
.type = &nft_quota_obj_type,
.size = sizeof(struct nft_quota),
.init = nft_quota_obj_init,
+ .destroy = nft_quota_obj_destroy,
.eval = nft_quota_obj_eval,
.dump = nft_quota_obj_dump,
.update = nft_quota_obj_update,
@@ -205,12 +224,35 @@ static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
return nft_quota_do_dump(skb, priv, false);
}
+static void nft_quota_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_quota *priv = nft_expr_priv(expr);
+
+ return nft_quota_do_destroy(ctx, priv);
+}
+
+static int nft_quota_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_quota *priv_dst = nft_expr_priv(dst);
+
+ priv_dst->consumed = kmalloc(sizeof(*priv_dst->consumed), GFP_ATOMIC);
+ if (priv_dst->consumed)
+ return -ENOMEM;
+
+ atomic64_set(priv_dst->consumed, 0);
+
+ return 0;
+}
+
static struct nft_expr_type nft_quota_type;
static const struct nft_expr_ops nft_quota_ops = {
.type = &nft_quota_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_quota)),
.eval = nft_quota_eval,
.init = nft_quota_init,
+ .destroy = nft_quota_destroy,
+ .clone = nft_quota_clone,
.dump = nft_quota_dump,
};
diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c
index d89f68754f42..61cd8c4ac385 100644
--- a/net/netfilter/nft_reject_netdev.c
+++ b/net/netfilter/nft_reject_netdev.c
@@ -4,6 +4,7 @@
* Copyright (c) 2020 Jose M. Guisado <guigom@riseup.net>
*/
+#include <linux/etherdevice.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index dce866d93fee..2c8051d8cca6 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1290,6 +1290,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch_aligned)
goto out_scratch;
#endif
+ for_each_possible_cpu(i)
+ *per_cpu_ptr(new->scratch, i) = NULL;
+
+ if (pipapo_realloc_scratch(new, old->bsize_max))
+ goto out_scratch_realloc;
rcu_head_init(&new->rcu);
@@ -1334,6 +1339,9 @@ out_lt:
kvfree(dst->lt);
dst--;
}
+out_scratch_realloc:
+ for_each_possible_cpu(i)
+ kfree(*per_cpu_ptr(new->scratch, i));
#ifdef NFT_PIPAPO_ALIGN
free_percpu(new->scratch_aligned);
#endif
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 6f4116e72958..52e0d026d30a 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1048,11 +1048,9 @@ static int nft_pipapo_avx2_lookup_slow(unsigned long *map, unsigned long *fill,
struct nft_pipapo_field *f, int offset,
const u8 *pkt, bool first, bool last)
{
- unsigned long *lt = f->lt, bsize = f->bsize;
+ unsigned long bsize = f->bsize;
int i, ret = -1, b;
- lt += offset * NFT_PIPAPO_LONGS_PER_M256;
-
if (first)
memset(map, 0xff, bsize * sizeof(*map));
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 0a913ce07425..267757b0392a 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -24,7 +24,7 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
return XT_CONTINUE;
if (ct) {
- atomic_inc(&ct->ct_general.use);
+ refcount_inc(&ct->ct_general.use);
nf_ct_set(skb, ct, IP_CT_NEW);
} else {
nf_ct_set(skb, ct, IP_CT_UNTRACKED);
@@ -201,7 +201,6 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
goto err4;
}
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
- nf_conntrack_get(&ct->ct_general);
out:
info->ct = ct;
return 0;