aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-10-03 13:02:48 -0700
committerJakub Kicinski <kuba@kernel.org>2022-10-03 13:02:49 -0700
commita08d97a1935bee66b099b21feddad19c1fd90d0e (patch)
treeb11aaa5e616432f1012c7c738f12ab64dacef9b2 /net/netfilter
parentonce: add DO_ONCE_SLOW() for sleepable contexts (diff)
parentnet: netfilter: move bpf_ct_set_nat_info kfunc in nf_nat_bpf.c (diff)
downloadlinux-dev-a08d97a1935bee66b099b21feddad19c1fd90d0e.tar.xz
linux-dev-a08d97a1935bee66b099b21feddad19c1fd90d0e.zip
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2022-10-03 We've added 143 non-merge commits during the last 27 day(s) which contain a total of 151 files changed, 8321 insertions(+), 1402 deletions(-). The main changes are: 1) Add kfuncs for PKCS#7 signature verification from BPF programs, from Roberto Sassu. 2) Add support for struct-based arguments for trampoline based BPF programs, from Yonghong Song. 3) Fix entry IP for kprobe-multi and trampoline probes under IBT enabled, from Jiri Olsa. 4) Batch of improvements to veristat selftest tool in particular to add CSV output, a comparison mode for CSV outputs and filtering, from Andrii Nakryiko. 5) Add preparatory changes needed for the BPF core for upcoming BPF HID support, from Benjamin Tissoires. 6) Support for direct writes to nf_conn's mark field from tc and XDP BPF program types, from Daniel Xu. 7) Initial batch of documentation improvements for BPF insn set spec, from Dave Thaler. 8) Add a new BPF_MAP_TYPE_USER_RINGBUF map which provides single-user-space-producer / single-kernel-consumer semantics for BPF ring buffer, from David Vernet. 9) Follow-up fixes to BPF allocator under RT to always use raw spinlock for the BPF hashtab's bucket lock, from Hou Tao. 10) Allow creating an iterator that loops through only the resources of one task/thread instead of all, from Kui-Feng Lee. 11) Add support for kptrs in the per-CPU arraymap, from Kumar Kartikeya Dwivedi. 12) Add a new kfunc helper for nf to set src/dst NAT IP/port in a newly allocated CT entry which is not yet inserted, from Lorenzo Bianconi. 13) Remove invalid recursion check for struct_ops for TCP congestion control BPF programs, from Martin KaFai Lau. 14) Fix W^X issue with BPF trampoline and BPF dispatcher, from Song Liu. 15) Fix percpu_counter leakage in BPF hashtab allocation error path, from Tetsuo Handa. 16) Various cleanups in BPF selftests to use preferred ASSERT_* macros, from Wang Yufen. 17) Add invocation for cgroup/connect{4,6} BPF programs for ICMP pings, from YiFei Zhu. 18) Lift blinding decision under bpf_jit_harden = 1 to bpf_capable(), from Yauheni Kaliuta. 19) Various libbpf fixes and cleanups including a libbpf NULL pointer deref, from Xin Liu. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (143 commits) net: netfilter: move bpf_ct_set_nat_info kfunc in nf_nat_bpf.c Documentation: bpf: Add implementation notes documentations to table of contents bpf, docs: Delete misformatted table. selftests/xsk: Fix double free bpftool: Fix error message of strerror libbpf: Fix overrun in netlink attribute iteration selftests/bpf: Fix spelling mistake "unpriviledged" -> "unprivileged" samples/bpf: Fix typo in xdp_router_ipv4 sample bpftool: Remove unused struct event_ring_info bpftool: Remove unused struct btf_attach_point bpf, docs: Add TOC and fix formatting. bpf, docs: Add Clang note about BPF_ALU bpf, docs: Move Clang notes to a separate file bpf, docs: Linux byteswap note bpf, docs: Move legacy packet instructions to a separate file selftests/bpf: Check -EBUSY for the recurred bpf_setsockopt(TCP_CONGESTION) bpf: tcp: Stop bpf_setsockopt(TCP_CONGESTION) in init ops to recur itself bpf: Refactor bpf_setsockopt(TCP_CONGESTION) handling into another function bpf: Move the "cdg" tcp-cc check to the common sol_tcp_sockopt() bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline ... ==================== Link: https://lore.kernel.org/r/20221003194915.11847-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/nf_conntrack_bpf.c74
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--net/netfilter/nf_nat_bpf.c79
-rw-r--r--net/netfilter/nf_nat_core.c4
5 files changed, 155 insertions, 9 deletions
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 06df49ea6329..0f060d100880 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -60,6 +60,12 @@ obj-$(CONFIG_NF_NAT) += nf_nat.o
nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
+ifeq ($(CONFIG_NF_NAT),m)
+nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o
+else ifeq ($(CONFIG_NF_NAT),y)
+nf_nat-$(CONFIG_DEBUG_INFO_BTF) += nf_nat_bpf.o
+endif
+
# NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 1cd87b28c9b0..8639e7efd0e2 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -6,12 +6,14 @@
* are exposed through to BPF programs is explicitly unstable.
*/
+#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/mutex.h>
#include <linux/types.h>
#include <linux/btf_ids.h>
#include <linux/net_namespace.h>
-#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -134,7 +136,6 @@ __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
memset(&ct->proto, 0, sizeof(ct->proto));
__nf_ct_set_timeout(ct, timeout * HZ);
- ct->status |= IPS_CONFIRMED;
out:
if (opts->netns_id >= 0)
@@ -184,14 +185,58 @@ static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
return ct;
}
+BTF_ID_LIST(btf_nf_conn_ids)
+BTF_ID(struct, nf_conn)
+BTF_ID(struct, nf_conn___init)
+
+/* Check writes into `struct nf_conn` */
+static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
+ const struct btf *btf,
+ const struct btf_type *t, int off,
+ int size, enum bpf_access_type atype,
+ u32 *next_btf_id,
+ enum bpf_type_flag *flag)
+{
+ const struct btf_type *ncit;
+ const struct btf_type *nct;
+ size_t end;
+
+ ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
+ nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
+
+ if (t != nct && t != ncit) {
+ bpf_log(log, "only read is supported\n");
+ return -EACCES;
+ }
+
+ /* `struct nf_conn` and `struct nf_conn___init` have the same layout
+ * so we are safe to simply merge offset checks here
+ */
+ switch (off) {
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+ case offsetof(struct nf_conn, mark):
+ end = offsetofend(struct nf_conn, mark);
+ break;
+#endif
+ default:
+ bpf_log(log, "no write support to nf_conn at off %d\n", off);
+ return -EACCES;
+ }
+
+ if (off + size > end) {
+ bpf_log(log,
+ "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
+ off, size, end);
+ return -EACCES;
+ }
+
+ return 0;
+}
+
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in nf_conntrack BTF");
-struct nf_conn___init {
- struct nf_conn ct;
-};
-
/* bpf_xdp_ct_alloc - Allocate a new CT entry
*
* Parameters:
@@ -339,6 +384,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
+ nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) {
nf_conntrack_free(nfct);
@@ -449,5 +495,19 @@ int register_nf_conntrack_bpf(void)
int ret;
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
- return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
+ if (!ret) {
+ mutex_lock(&nf_conn_btf_access_lock);
+ nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
+ mutex_unlock(&nf_conn_btf_access_lock);
+ }
+
+ return ret;
+}
+
+void cleanup_nf_conntrack_bpf(void)
+{
+ mutex_lock(&nf_conn_btf_access_lock);
+ nfct_btf_struct_access = NULL;
+ mutex_unlock(&nf_conn_btf_access_lock);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8208a28ea342..f97bda06d2a9 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2516,6 +2516,7 @@ static int kill_all(struct nf_conn *i, void *data)
void nf_conntrack_cleanup_start(void)
{
+ cleanup_nf_conntrack_bpf();
conntrack_gc_work.exiting = true;
}
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
new file mode 100644
index 000000000000..0fa5a0bbb0ff
--- /dev/null
+++ b/net/netfilter/nf_nat_bpf.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable NAT Helpers for XDP and TC-BPF hook
+ *
+ * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
+ * allowed to break compatibility for these functions since the interface they
+ * are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_nat.h>
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in nf_nat BTF");
+
+/* bpf_ct_set_nat_info - Set source or destination nat address
+ *
+ * Set source or destination nat address of the newly allocated
+ * nf_conn before insertion. This must be invoked for referenced
+ * PTR_TO_BTF_ID to nf_conn___init.
+ *
+ * Parameters:
+ * @nfct - Pointer to referenced nf_conn object, obtained using
+ * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
+ * @addr - Nat source/destination address
+ * @port - Nat source/destination port. Non-positive values are
+ * interpreted as select a random port.
+ * @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
+ */
+int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
+ union nf_inet_addr *addr, int port,
+ enum nf_nat_manip_type manip)
+{
+ struct nf_conn *ct = (struct nf_conn *)nfct;
+ u16 proto = nf_ct_l3num(ct);
+ struct nf_nat_range2 range;
+
+ if (proto != NFPROTO_IPV4 && proto != NFPROTO_IPV6)
+ return -EINVAL;
+
+ memset(&range, 0, sizeof(struct nf_nat_range2));
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = *addr;
+ range.max_addr = range.min_addr;
+ if (port > 0) {
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto.all = cpu_to_be16(port);
+ range.max_proto.all = range.min_proto.all;
+ }
+
+ return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(nf_nat_kfunc_set)
+BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS)
+BTF_SET8_END(nf_nat_kfunc_set)
+
+static const struct btf_kfunc_id_set nf_bpf_nat_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &nf_nat_kfunc_set,
+};
+
+int register_nf_nat_bpf(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+ &nf_bpf_nat_kfunc_set);
+ if (ret)
+ return ret;
+
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &nf_bpf_nat_kfunc_set);
+}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7981be526f26..d8e6380f6337 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -16,7 +16,7 @@
#include <linux/siphash.h>
#include <linux/rtnetlink.h>
-#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
@@ -1152,7 +1152,7 @@ static int __init nf_nat_init(void)
WARN_ON(nf_nat_hook != NULL);
RCU_INIT_POINTER(nf_nat_hook, &nat_hook);
- return 0;
+ return register_nf_nat_bpf();
}
static void __exit nf_nat_cleanup(void)