aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/index.rst10
-rw-r--r--Documentation/bpf/libbpf/index.rst (renamed from Documentation/bpf/libbpf/libbpf.rst)8
-rw-r--r--Documentation/bpf/libbpf/libbpf_api.rst27
-rw-r--r--Documentation/bpf/libbpf/libbpf_naming_convention.rst2
-rw-r--r--Documentation/networking/filter.rst4
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/media/rc/bpf-lirc.c6
-rw-r--r--drivers/net/ppp/ppp_generic.c8
-rw-r--r--drivers/net/team/team_mode_loadbalance.c2
-rw-r--r--include/linux/bpf-cgroup.h182
-rw-r--r--include/linux/bpf.h200
-rw-r--r--include/linux/bpf_types.h3
-rw-r--r--include/linux/bpfptr.h12
-rw-r--r--include/linux/btf_ids.h9
-rw-r--r--include/linux/filter.h66
-rw-r--r--include/linux/perf_event.h1
-rw-r--r--include/linux/trace_events.h7
-rw-r--r--include/net/af_unix.h8
-rw-r--r--include/uapi/linux/bpf.h34
-rw-r--r--kernel/bpf/bpf_iter.c2
-rw-r--r--kernel/bpf/bpf_struct_ops.c22
-rw-r--r--kernel/bpf/bpf_task_storage.c6
-rw-r--r--kernel/bpf/btf.c2
-rw-r--r--kernel/bpf/cgroup.c198
-rw-r--r--kernel/bpf/core.c33
-rw-r--r--kernel/bpf/helpers.c20
-rw-r--r--kernel/bpf/stackmap.c4
-rw-r--r--kernel/bpf/syscall.c167
-rw-r--r--kernel/bpf/task_iter.c11
-rw-r--r--kernel/bpf/trampoline.c2
-rw-r--r--kernel/bpf/verifier.c6
-rw-r--r--kernel/events/core.c77
-rw-r--r--kernel/trace/bpf_trace.c72
-rw-r--r--lib/test_bpf.c4
-rw-r--r--net/bpf/test_run.c6
-rw-r--r--net/core/filter.c38
-rw-r--r--net/core/ptp_classifier.c2
-rw-r--r--net/core/sock_map.c1
-rw-r--r--net/ipv4/af_inet.c6
-rw-r--r--net/ipv4/bpf_tcp_ca.c41
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/netfilter/xt_bpf.c2
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/unix/af_unix.c189
-rw-r--r--net/unix/unix_bpf.c93
-rw-r--r--samples/bpf/Makefile109
-rw-r--r--samples/bpf/Makefile.target11
-rw-r--r--samples/bpf/cookie_uid_helper_example.c11
-rw-r--r--samples/bpf/offwaketime_kern.c9
-rw-r--r--samples/bpf/tracex4_user.c2
-rw-r--r--samples/bpf/xdp_monitor.bpf.c8
-rw-r--r--samples/bpf/xdp_monitor_kern.c257
-rw-r--r--samples/bpf/xdp_monitor_user.c798
-rw-r--r--samples/bpf/xdp_redirect.bpf.c49
-rw-r--r--samples/bpf/xdp_redirect_cpu.bpf.c (renamed from samples/bpf/xdp_redirect_cpu_kern.c)393
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c1106
-rw-r--r--samples/bpf/xdp_redirect_kern.c90
-rw-r--r--samples/bpf/xdp_redirect_map.bpf.c (renamed from samples/bpf/xdp_redirect_map_kern.c)89
-rw-r--r--samples/bpf/xdp_redirect_map_multi.bpf.c (renamed from samples/bpf/xdp_redirect_map_multi_kern.c)50
-rw-r--r--samples/bpf/xdp_redirect_map_multi_user.c345
-rw-r--r--samples/bpf/xdp_redirect_map_user.c385
-rw-r--r--samples/bpf/xdp_redirect_user.c270
-rw-r--r--samples/bpf/xdp_sample.bpf.c266
-rw-r--r--samples/bpf/xdp_sample.bpf.h141
-rw-r--r--samples/bpf/xdp_sample_shared.h17
-rw-r--r--samples/bpf/xdp_sample_user.c1673
-rw-r--r--samples/bpf/xdp_sample_user.h108
-rw-r--r--tools/include/uapi/linux/bpf.h34
-rw-r--r--tools/include/uapi/linux/ethtool.h53
-rw-r--r--tools/lib/bpf/Makefile10
-rw-r--r--tools/lib/bpf/bpf.c32
-rw-r--r--tools/lib/bpf/bpf.h8
-rw-r--r--tools/lib/bpf/libbpf.c229
-rw-r--r--tools/lib/bpf/libbpf.h75
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_internal.h32
-rw-r--r--tools/testing/selftests/bpf/Makefile4
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h19
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c254
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_module.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_link.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c79
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c80
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h4
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c84
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c39
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c85
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_link.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c29
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh10
-rw-r--r--tools/testing/selftests/bpf/test_maps.c18
-rw-r--r--tools/testing/selftests/bpf/test_progs.c107
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh10
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c87
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c681
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h63
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh30
126 files changed, 6813 insertions, 4027 deletions
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index baea6c2abba5..1ceb5d704a97 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -15,15 +15,7 @@ that goes into great technical depth about the BPF Architecture.
libbpf
======
-Libbpf is a userspace library for loading and interacting with bpf programs.
-
-.. toctree::
- :maxdepth: 1
-
- libbpf/libbpf
- libbpf/libbpf_api
- libbpf/libbpf_build
- libbpf/libbpf_naming_convention
+Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs.
BPF Type Format (BTF)
=====================
diff --git a/Documentation/bpf/libbpf/libbpf.rst b/Documentation/bpf/libbpf/index.rst
index 1b1e61d5ead1..4f8adfc3ab83 100644
--- a/Documentation/bpf/libbpf/libbpf.rst
+++ b/Documentation/bpf/libbpf/index.rst
@@ -3,6 +3,14 @@
libbpf
======
+For API documentation see the `versioned API documentation site <https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+.. toctree::
+ :maxdepth: 1
+
+ libbpf_naming_convention
+ libbpf_build
+
This is documentation for libbpf, a userspace library for loading and
interacting with bpf programs.
diff --git a/Documentation/bpf/libbpf/libbpf_api.rst b/Documentation/bpf/libbpf/libbpf_api.rst
deleted file mode 100644
index f07eecd054da..000000000000
--- a/Documentation/bpf/libbpf/libbpf_api.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-API
-===
-
-This documentation is autogenerated from header files in libbpf, tools/lib/bpf
-
-.. kernel-doc:: tools/lib/bpf/libbpf.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/btf.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/xsk.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_tracing.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_core_read.h
- :internal:
-
-.. kernel-doc:: tools/lib/bpf/bpf_endian.h
- :internal: \ No newline at end of file
diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
index 6bf9c5ac7576..9c68d5014ff1 100644
--- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst
+++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
@@ -69,7 +69,7 @@ functions. These can be mixed and matched. Note that these functions
are not reentrant for performance reasons.
ABI
-==========
+---
libbpf can be both linked statically or used as DSO. To avoid possible
conflicts with other libraries an application is linked with, all
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
index 5f13905b12e0..ce2b8e8bb9ab 100644
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -638,8 +638,8 @@ extension, PTP dissector/classifier, and much more. They are all internally
converted by the kernel into the new instruction set representation and run
in the eBPF interpreter. For in-kernel handlers, this all works transparently
by using bpf_prog_create() for setting up the filter, resp.
-bpf_prog_destroy() for destroying it. The macro
-BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+bpf_prog_destroy() for destroying it. The function
+bpf_prog_run(filter, ctx) transparently invokes eBPF interpreter or JITed
code to run the filter. 'filter' is a pointer to struct bpf_prog that we
got from bpf_prog_create(), and 'ctx' the given context (e.g.
skb pointer). All constraints and restrictions from bpf_check_classic() apply
diff --git a/MAINTAINERS b/MAINTAINERS
index 6abfd3e36c31..2f12abc1b739 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3409,7 +3409,6 @@ F: drivers/net/ethernet/netronome/nfp/bpf/
BPF JIT for POWERPC (32-BIT AND 64-BIT)
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
-M: Sandipan Das <sandipan@linux.ibm.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index afae0afe3f81..3eff08d7b8e5 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -160,7 +160,7 @@ static int lirc_bpf_attach(struct rc_dev *rcdev, struct bpf_prog *prog)
goto unlock;
}
- ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
+ ret = bpf_prog_array_copy(old_array, NULL, prog, 0, &new_array);
if (ret < 0)
goto unlock;
@@ -193,7 +193,7 @@ static int lirc_bpf_detach(struct rc_dev *rcdev, struct bpf_prog *prog)
}
old_array = lirc_rcu_dereference(raw->progs);
- ret = bpf_prog_array_copy(old_array, prog, NULL, &new_array);
+ ret = bpf_prog_array_copy(old_array, prog, NULL, 0, &new_array);
/*
* Do not use bpf_prog_array_delete_safe() as we would end up
* with a dummy entry in the array, and the we would free the
@@ -217,7 +217,7 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
raw->bpf_sample = sample;
if (raw->progs)
- BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, BPF_PROG_RUN);
+ BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run);
}
/*
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index e9e81573f21e..fb52cd175b45 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1744,7 +1744,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
a four-byte PPP header on each packet */
*(u8 *)skb_push(skb, 2) = 1;
if (ppp->pass_filter &&
- BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
+ bpf_prog_run(ppp->pass_filter, skb) == 0) {
if (ppp->debug & 1)
netdev_printk(KERN_DEBUG, ppp->dev,
"PPP: outbound frame "
@@ -1754,7 +1754,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
}
/* if this packet passes the active filter, record the time */
if (!(ppp->active_filter &&
- BPF_PROG_RUN(ppp->active_filter, skb) == 0))
+ bpf_prog_run(ppp->active_filter, skb) == 0))
ppp->last_xmit = jiffies;
skb_pull(skb, 2);
#else
@@ -2468,7 +2468,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
*(u8 *)skb_push(skb, 2) = 0;
if (ppp->pass_filter &&
- BPF_PROG_RUN(ppp->pass_filter, skb) == 0) {
+ bpf_prog_run(ppp->pass_filter, skb) == 0) {
if (ppp->debug & 1)
netdev_printk(KERN_DEBUG, ppp->dev,
"PPP: inbound frame "
@@ -2477,7 +2477,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
return;
}
if (!(ppp->active_filter &&
- BPF_PROG_RUN(ppp->active_filter, skb) == 0))
+ bpf_prog_run(ppp->active_filter, skb) == 0))
ppp->last_recv = jiffies;
__skb_pull(skb, 2);
} else
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 32aef8ac4a14..b095a4b4957b 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -197,7 +197,7 @@ static unsigned int lb_get_skb_hash(struct lb_priv *lb_priv,
fp = rcu_dereference_bh(lb_priv->fp);
if (unlikely(!fp))
return 0;
- lhash = BPF_PROG_RUN(fp, skb);
+ lhash = bpf_prog_run(fp, skb);
c = (char *) &lhash;
return c[0] ^ c[1] ^ c[2] ^ c[3];
}
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a74cd1c3bd87..2746fd804216 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,9 +23,73 @@ struct ctl_table_header;
struct task_struct;
#ifdef CONFIG_CGROUP_BPF
+enum cgroup_bpf_attach_type {
+ CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
+ CGROUP_INET_INGRESS = 0,
+ CGROUP_INET_EGRESS,
+ CGROUP_INET_SOCK_CREATE,
+ CGROUP_SOCK_OPS,
+ CGROUP_DEVICE,
+ CGROUP_INET4_BIND,
+ CGROUP_INET6_BIND,
+ CGROUP_INET4_CONNECT,
+ CGROUP_INET6_CONNECT,
+ CGROUP_INET4_POST_BIND,
+ CGROUP_INET6_POST_BIND,
+ CGROUP_UDP4_SENDMSG,
+ CGROUP_UDP6_SENDMSG,
+ CGROUP_SYSCTL,
+ CGROUP_UDP4_RECVMSG,
+ CGROUP_UDP6_RECVMSG,
+ CGROUP_GETSOCKOPT,
+ CGROUP_SETSOCKOPT,
+ CGROUP_INET4_GETPEERNAME,
+ CGROUP_INET6_GETPEERNAME,
+ CGROUP_INET4_GETSOCKNAME,
+ CGROUP_INET6_GETSOCKNAME,
+ CGROUP_INET_SOCK_RELEASE,
+ MAX_CGROUP_BPF_ATTACH_TYPE
+};
+
+#define CGROUP_ATYPE(type) \
+ case BPF_##type: return type
+
+static inline enum cgroup_bpf_attach_type
+to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type)
+{
+ switch (attach_type) {
+ CGROUP_ATYPE(CGROUP_INET_INGRESS);
+ CGROUP_ATYPE(CGROUP_INET_EGRESS);
+ CGROUP_ATYPE(CGROUP_INET_SOCK_CREATE);
+ CGROUP_ATYPE(CGROUP_SOCK_OPS);
+ CGROUP_ATYPE(CGROUP_DEVICE);
+ CGROUP_ATYPE(CGROUP_INET4_BIND);
+ CGROUP_ATYPE(CGROUP_INET6_BIND);
+ CGROUP_ATYPE(CGROUP_INET4_CONNECT);
+ CGROUP_ATYPE(CGROUP_INET6_CONNECT);
+ CGROUP_ATYPE(CGROUP_INET4_POST_BIND);
+ CGROUP_ATYPE(CGROUP_INET6_POST_BIND);
+ CGROUP_ATYPE(CGROUP_UDP4_SENDMSG);
+ CGROUP_ATYPE(CGROUP_UDP6_SENDMSG);
+ CGROUP_ATYPE(CGROUP_SYSCTL);
+ CGROUP_ATYPE(CGROUP_UDP4_RECVMSG);
+ CGROUP_ATYPE(CGROUP_UDP6_RECVMSG);
+ CGROUP_ATYPE(CGROUP_GETSOCKOPT);
+ CGROUP_ATYPE(CGROUP_SETSOCKOPT);
+ CGROUP_ATYPE(CGROUP_INET4_GETPEERNAME);
+ CGROUP_ATYPE(CGROUP_INET6_GETPEERNAME);
+ CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME);
+ CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME);
+ CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE);
+ default:
+ return CGROUP_BPF_ATTACH_TYPE_INVALID;
+ }
+}
+
+#undef CGROUP_ATYPE
-extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
-#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
+extern struct static_key_false cgroup_bpf_enabled_key[MAX_CGROUP_BPF_ATTACH_TYPE];
+#define cgroup_bpf_enabled(atype) static_branch_unlikely(&cgroup_bpf_enabled_key[atype])
#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -67,15 +131,15 @@ struct bpf_prog_array;
struct cgroup_bpf {
/* array of effective progs in this cgroup */
- struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
+ struct bpf_prog_array __rcu *effective[MAX_CGROUP_BPF_ATTACH_TYPE];
/* attached progs to this cgroup and attach flags
* when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
* have either zero or one element
* when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
*/
- struct list_head progs[MAX_BPF_ATTACH_TYPE];
- u32 flags[MAX_BPF_ATTACH_TYPE];
+ struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
+ u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
/* list of cgroup shared storages */
struct list_head storages;
@@ -115,28 +179,28 @@ int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_sk(struct sock *sk,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
- short access, enum bpf_attach_type type);
+ short access, enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
char **buf, size_t *pcount, loff_t *ppos,
- enum bpf_attach_type type);
+ enum cgroup_bpf_attach_type atype);
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
int *optname, char __user *optval,
@@ -179,9 +243,9 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \
+ if (cgroup_bpf_enabled(CGROUP_INET_INGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
- BPF_CGROUP_INET_INGRESS); \
+ CGROUP_INET_INGRESS); \
\
__ret; \
})
@@ -189,54 +253,54 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
+ if (cgroup_bpf_enabled(CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
if (sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
- BPF_CGROUP_INET_EGRESS); \
+ CGROUP_INET_EGRESS); \
} \
__ret; \
})
-#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
+#define BPF_CGROUP_RUN_SK_PROG(sk, atype) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) { \
- __ret = __cgroup_bpf_run_filter_sk(sk, type); \
+ if (cgroup_bpf_enabled(atype)) { \
+ __ret = __cgroup_bpf_run_filter_sk(sk, atype); \
} \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_CREATE)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_CREATE)
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET_SOCK_RELEASE)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET_SOCK_RELEASE)
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET4_POST_BIND)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET4_POST_BIND)
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) \
- BPF_CGROUP_RUN_SK_PROG(sk, BPF_CGROUP_INET6_POST_BIND)
+ BPF_CGROUP_RUN_SK_PROG(sk, CGROUP_INET6_POST_BIND)
-#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) \
({ \
u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ if (cgroup_bpf_enabled(atype)) \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
NULL, \
&__unused_flags); \
__ret; \
})
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) \
({ \
u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) { \
+ if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
t_ctx, \
&__unused_flags); \
release_sock(sk); \
@@ -249,13 +313,13 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
* (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
* should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
*/
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, bind_flags) \
({ \
u32 __flags = 0; \
int __ret = 0; \
- if (cgroup_bpf_enabled(type)) { \
+ if (cgroup_bpf_enabled(atype)) { \
lock_sock(sk); \
- __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, atype, \
NULL, &__flags); \
release_sock(sk); \
if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
@@ -265,33 +329,33 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
})
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
- ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
- cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \
+ ((cgroup_bpf_enabled(CGROUP_INET4_CONNECT) || \
+ cgroup_bpf_enabled(CGROUP_INET6_CONNECT)) && \
(sk)->sk_prot->pre_connect)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET4_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET6_CONNECT)
+ BPF_CGROUP_RUN_SA_PROG(sk, uaddr, CGROUP_INET6_CONNECT)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_CONNECT, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET4_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_CONNECT, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_INET6_CONNECT, NULL)
#define BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_SENDMSG, t_ctx)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_SENDMSG, t_ctx)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_SENDMSG, t_ctx)
#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP4_RECVMSG, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP4_RECVMSG, NULL)
#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_UDP6_RECVMSG, NULL)
+ BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, CGROUP_UDP6_RECVMSG, NULL)
/* The SOCK_OPS"_SK" macro should be used when sock_ops->sk is not a
* fullsock and its parent fullsock cannot be traced by
@@ -311,33 +375,33 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \
+ if (cgroup_bpf_enabled(CGROUP_SOCK_OPS)) \
__ret = __cgroup_bpf_run_filter_sock_ops(sk, \
sock_ops, \
- BPF_CGROUP_SOCK_OPS); \
+ CGROUP_SOCK_OPS); \
__ret; \
})
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
+ if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
if (__sk && sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
sock_ops, \
- BPF_CGROUP_SOCK_OPS); \
+ CGROUP_SOCK_OPS); \
} \
__ret; \
})
-#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \
- __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
+ if (cgroup_bpf_enabled(CGROUP_DEVICE)) \
+ __ret = __cgroup_bpf_check_dev_permission(atype, major, minor, \
access, \
- BPF_CGROUP_DEVICE); \
+ CGROUP_DEVICE); \
\
__ret; \
})
@@ -346,10 +410,10 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \
+ if (cgroup_bpf_enabled(CGROUP_SYSCTL)) \
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
buf, count, pos, \
- BPF_CGROUP_SYSCTL); \
+ CGROUP_SYSCTL); \
__ret; \
})
@@ -357,7 +421,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
kernel_optval) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_SETSOCKOPT)) \
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
optname, optval, \
optlen, \
@@ -368,7 +432,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
get_user(__ret, optlen); \
__ret; \
})
@@ -377,7 +441,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
max_optlen, retval) \
({ \
int __ret = retval; \
- if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
!INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
tcp_bpf_bypass_getsockopt, \
@@ -392,7 +456,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
optlen, retval) \
({ \
int __ret = retval; \
- if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \
__ret = __cgroup_bpf_run_filter_getsockopt_kern( \
sock, level, optname, optval, optlen, retval); \
__ret; \
@@ -451,14 +515,14 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
return 0;
}
-#define cgroup_bpf_enabled(type) (0)
-#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
+#define cgroup_bpf_enabled(atype) (0)
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, atype, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
@@ -470,7 +534,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; })
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c8cc09013210..f4c16f19f83e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1103,7 +1103,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
/* an array of programs to be executed under rcu_lock.
*
* Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
+ * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
*
* the structure returned by bpf_prog_array_alloc() should be populated
* with program pointers and the last pointer must be NULL.
@@ -1114,7 +1114,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
*/
struct bpf_prog_array_item {
struct bpf_prog *prog;
- struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+ union {
+ struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
+ u64 bpf_cookie;
+ };
};
struct bpf_prog_array {
@@ -1140,73 +1143,133 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array,
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
+ u64 bpf_cookie,
struct bpf_prog_array **new_array);
struct bpf_run_ctx {};
struct bpf_cg_run_ctx {
struct bpf_run_ctx run_ctx;
- struct bpf_prog_array_item *prog_item;
+ const struct bpf_prog_array_item *prog_item;
+};
+
+struct bpf_trace_run_ctx {
+ struct bpf_run_ctx run_ctx;
+ u64 bpf_cookie;
};
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+ struct bpf_run_ctx *old_ctx = NULL;
+
+#ifdef CONFIG_BPF_SYSCALL
+ old_ctx = current->bpf_ctx;
+ current->bpf_ctx = new_ctx;
+#endif
+ return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+#ifdef CONFIG_BPF_SYSCALL
+ current->bpf_ctx = old_ctx;
+#endif
+}
+
/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
/* BPF program asks to set CN on the packet. */
#define BPF_RET_SET_CN (1 << 0)
-#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
- ({ \
- struct bpf_prog_array_item *_item; \
- struct bpf_prog *_prog; \
- struct bpf_prog_array *_array; \
- struct bpf_run_ctx *old_run_ctx; \
- struct bpf_cg_run_ctx run_ctx; \
- u32 _ret = 1; \
- u32 func_ret; \
- migrate_disable(); \
- rcu_read_lock(); \
- _array = rcu_dereference(array); \
- _item = &_array->items[0]; \
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \
- while ((_prog = READ_ONCE(_item->prog))) { \
- run_ctx.prog_item = _item; \
- func_ret = func(_prog, ctx); \
- _ret &= (func_ret & 1); \
- *(ret_flags) |= (func_ret >> 1); \
- _item++; \
- } \
- bpf_reset_run_ctx(old_run_ctx); \
- rcu_read_unlock(); \
- migrate_enable(); \
- _ret; \
- })
-
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \
- ({ \
- struct bpf_prog_array_item *_item; \
- struct bpf_prog *_prog; \
- struct bpf_prog_array *_array; \
- struct bpf_run_ctx *old_run_ctx; \
- struct bpf_cg_run_ctx run_ctx; \
- u32 _ret = 1; \
- migrate_disable(); \
- rcu_read_lock(); \
- _array = rcu_dereference(array); \
- if (unlikely(check_non_null && !_array))\
- goto _out; \
- _item = &_array->items[0]; \
- old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
- while ((_prog = READ_ONCE(_item->prog))) { \
- run_ctx.prog_item = _item; \
- _ret &= func(_prog, ctx); \
- _item++; \
- } \
- bpf_reset_run_ctx(old_run_ctx); \
-_out: \
- rcu_read_unlock(); \
- migrate_enable(); \
- _ret; \
- })
+typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog,
+ u32 *ret_flags)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+ u32 ret = 1;
+ u32 func_ret;
+
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(array_rcu);
+ item = &array->items[0];
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.prog_item = item;
+ func_ret = run_prog(prog, ctx);
+ ret &= (func_ret & 1);
+ *(ret_flags) |= (func_ret >> 1);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+ migrate_enable();
+ return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_cg_run_ctx run_ctx;
+ u32 ret = 1;
+
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(array_rcu);
+ item = &array->items[0];
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.prog_item = item;
+ ret &= run_prog(prog, ctx);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+ rcu_read_unlock();
+ migrate_enable();
+ return ret;
+}
+
+static __always_inline u32
+BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+ const void *ctx, bpf_prog_run_fn run_prog)
+{
+ const struct bpf_prog_array_item *item;
+ const struct bpf_prog *prog;
+ const struct bpf_prog_array *array;
+ struct bpf_run_ctx *old_run_ctx;
+ struct bpf_trace_run_ctx run_ctx;
+ u32 ret = 1;
+
+ migrate_disable();
+ rcu_read_lock();
+ array = rcu_dereference(array_rcu);
+ if (unlikely(!array))
+ goto out;
+ old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+ item = &array->items[0];
+ while ((prog = READ_ONCE(item->prog))) {
+ run_ctx.bpf_cookie = item->bpf_cookie;
+ ret &= run_prog(prog, ctx);
+ item++;
+ }
+ bpf_reset_run_ctx(old_run_ctx);
+out:
+ rcu_read_unlock();
+ migrate_enable();
+ return ret;
+}
/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
* so BPF programs can request cwr for TCP packets.
@@ -1235,7 +1298,7 @@ _out: \
u32 _flags = 0; \
bool _cn; \
u32 _ret; \
- _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+ _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
_cn = _flags & BPF_RET_SET_CN; \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
@@ -1244,12 +1307,6 @@ _out: \
_ret; \
})
-#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
- __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
-
-#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \
- __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
-
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -1284,20 +1341,6 @@ static inline void bpf_enable_instrumentation(void)
migrate_enable();
}
-static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
-{
- struct bpf_run_ctx *old_ctx;
-
- old_ctx = current->bpf_ctx;
- current->bpf_ctx = new_ctx;
- return old_ctx;
-}
-
-static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
-{
- current->bpf_ctx = old_ctx;
-}
-
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
extern const struct file_operations bpf_iter_fops;
@@ -2059,9 +2102,6 @@ extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
-const struct bpf_func_proto *bpf_tracing_func_proto(
- enum bpf_func_id func_id, const struct bpf_prog *prog);
-
const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index ae3ac3a2018c..9c81724e4b98 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -136,3 +136,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
#endif
+#ifdef CONFIG_PERF_EVENTS
+BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
+#endif
diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 5cdeab497cb3..546e27fc6d46 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -62,9 +62,17 @@ static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset,
return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size);
}
-static inline void *memdup_bpfptr(bpfptr_t src, size_t len)
+static inline void *kvmemdup_bpfptr(bpfptr_t src, size_t len)
{
- return memdup_sockptr((sockptr_t) src, len);
+ void *p = kvmalloc(len, GFP_USER | __GFP_NOWARN);
+
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+ if (copy_from_bpfptr(p, src, len)) {
+ kvfree(p);
+ return ERR_PTR(-EFAULT);
+ }
+ return p;
}
static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count)
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 57890b357f85..47d9abfbdb55 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl)
#define BTF_ID_LIST_SINGLE(name, prefix, typename) \
BTF_ID_LIST(name) \
BTF_ID(prefix, typename)
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \
+ BTF_ID_LIST_GLOBAL(name) \
+ BTF_ID(prefix, typename)
/*
* The BTF_ID_UNUSED macro defines 4 zero bytes.
@@ -148,6 +151,7 @@ extern struct btf_id_set name;
#define BTF_ID_UNUSED
#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1];
#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
#define BTF_SET_END(name)
@@ -172,7 +176,8 @@ extern struct btf_id_set name;
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \
BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \
- BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \
+ BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
enum {
#define BTF_SOCK_TYPE(name, str) name,
@@ -184,4 +189,6 @@ MAX_BTF_SOCK_TYPE,
extern u32 btf_sock_ids[];
#endif
+extern u32 btf_task_struct_ids[];
+
#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1797e8506929..7d248941ecea 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -600,25 +600,38 @@ struct sk_filter {
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
-#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
- u32 __ret; \
- cant_migrate(); \
- if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
- struct bpf_prog_stats *__stats; \
- u64 __start = sched_clock(); \
- __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
- __stats = this_cpu_ptr(prog->stats); \
- u64_stats_update_begin(&__stats->syncp); \
- __stats->cnt++; \
- __stats->nsecs += sched_clock() - __start; \
- u64_stats_update_end(&__stats->syncp); \
- } else { \
- __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
- } \
- __ret; })
-
-#define BPF_PROG_RUN(prog, ctx) \
- __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func)
+typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
+ const struct bpf_insn *insnsi,
+ unsigned int (*bpf_func)(const void *,
+ const struct bpf_insn *));
+
+static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
+ const void *ctx,
+ bpf_dispatcher_fn dfunc)
+{
+ u32 ret;
+
+ cant_migrate();
+ if (static_branch_unlikely(&bpf_stats_enabled_key)) {
+ struct bpf_prog_stats *stats;
+ u64 start = sched_clock();
+
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ stats = this_cpu_ptr(prog->stats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->cnt++;
+ stats->nsecs += sched_clock() - start;
+ u64_stats_update_end(&stats->syncp);
+ } else {
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ }
+ return ret;
+}
+
+static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void *ctx)
+{
+ return __bpf_prog_run(prog, ctx, bpf_dispatcher_nop_func);
+}
/*
* Use in preemptible and therefore migratable context to make sure that
@@ -637,7 +650,7 @@ static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
u32 ret;
migrate_disable();
- ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func);
+ ret = bpf_prog_run(prog, ctx);
migrate_enable();
return ret;
}
@@ -710,7 +723,7 @@ static inline void bpf_restore_data_end(
cb->data_end = saved_data_end;
}
-static inline u8 *bpf_skb_cb(struct sk_buff *skb)
+static inline u8 *bpf_skb_cb(const struct sk_buff *skb)
{
/* eBPF programs may read/write skb->cb[] area to transfer meta
* data between tail calls. Since this also needs to work with
@@ -731,8 +744,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
- struct sk_buff *skb)
+ const void *ctx)
{
+ const struct sk_buff *skb = ctx;
u8 *cb_data = bpf_skb_cb(skb);
u8 cb_saved[BPF_SKB_CB_LEN];
u32 res;
@@ -742,7 +756,7 @@ static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
memset(cb_data, 0, sizeof(cb_saved));
}
- res = BPF_PROG_RUN(prog, skb);
+ res = bpf_prog_run(prog, skb);
if (unlikely(prog->cb_access))
memcpy(cb_data, cb_saved, sizeof(cb_saved));
@@ -787,7 +801,7 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* under local_bh_disable(), which provides the needed RCU protection
* for accessing map entries.
*/
- u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+ u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
@@ -1440,7 +1454,7 @@ static inline bool bpf_sk_lookup_run_v4(struct net *net, int protocol,
};
u32 act;
- act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+ act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
if (act == SK_PASS) {
selected_sk = ctx.selected_sk;
no_reuseport = ctx.no_reuseport;
@@ -1478,7 +1492,7 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
};
u32 act;
- act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, BPF_PROG_RUN);
+ act = BPF_PROG_SK_LOOKUP_RUN_ARRAY(run_array, ctx, bpf_prog_run);
if (act == SK_PASS) {
selected_sk = ctx.selected_sk;
no_reuseport = ctx.no_reuseport;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2d510ad750ed..fe156a8170aa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -762,6 +762,7 @@ struct perf_event {
#ifdef CONFIG_BPF_SYSCALL
perf_overflow_handler_t orig_overflow_handler;
struct bpf_prog *prog;
+ u64 bpf_cookie;
#endif
#ifdef CONFIG_EVENT_TRACING
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index ad413b382a3c..8e0631a4b046 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -675,7 +675,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
#ifdef CONFIG_BPF_EVENTS
unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
-int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
+int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
void perf_event_detach_bpf_prog(struct perf_event *event);
int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
@@ -692,7 +692,7 @@ static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *c
}
static inline int
-perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog)
+perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie)
{
return -EOPNOTSUPP;
}
@@ -803,6 +803,9 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
void perf_trace_buf_update(void *record, u16 type);
void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog, u64 bpf_cookie);
+void perf_event_free_bpf_prog(struct perf_event *event);
+
void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 4757d7f53f13..7d142e8a0550 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -87,6 +87,8 @@ long unix_outq_len(struct sock *sk);
int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
int flags);
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
+ int flags);
#ifdef CONFIG_SYSCTL
int unix_sysctl_register(struct net *net);
void unix_sysctl_unregister(struct net *net);
@@ -96,9 +98,11 @@ static inline void unix_sysctl_unregister(struct net *net) {}
#endif
#ifdef CONFIG_BPF_SYSCALL
-extern struct proto unix_proto;
+extern struct proto unix_dgram_proto;
+extern struct proto unix_stream_proto;
-int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void __init unix_bpf_build_proto(void);
#else
static inline void __init unix_bpf_build_proto(void)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2db6925e04f4..791f31dd0abe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
- __u32 attach_type; /* program attach type */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
union bpf_iter_link_info {
@@ -993,6 +993,7 @@ enum bpf_attach_type {
BPF_SK_SKB_VERDICT,
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+ BPF_PERF_EVENT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1006,6 +1007,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
+ BPF_LINK_TYPE_PERF_EVENT = 7,
MAX_BPF_LINK_TYPE,
};
@@ -1446,6 +1448,13 @@ union bpf_attr {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
};
+ struct {
+ /* black box user-provided value passed through
+ * to BPF program at the execution time and
+ * accessible through bpf_get_attach_cookie() BPF helper
+ */
+ __u64 bpf_cookie;
+ } perf_event;
};
} link_create;
@@ -4847,6 +4856,27 @@ union bpf_attr {
* Get address of the traced function (for tracing and kprobe programs).
* Return
* Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ * Description
+ * Get bpf_cookie value provided (optionally) during the program
+ * attachment. It might be different for each individual
+ * attachment, even if BPF program itself is the same.
+ * Expects BPF program context *ctx* as a first argument.
+ *
+ * Supported for the following program types:
+ * - kprobe/uprobe;
+ * - tracepoint;
+ * - perf_event.
+ * Return
+ * Value specified by user at BPF link creation/attachment time
+ * or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ * Description
+ * Get the struct pt_regs associated with **task**.
+ * Return
+ * A pointer to struct pt_regs.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5023,6 +5053,8 @@ union bpf_attr {
FN(timer_start), \
FN(timer_cancel), \
FN(get_func_ip), \
+ FN(get_attach_cookie), \
+ FN(task_pt_regs), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 2e9d47bb40ff..b2ee45064e06 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -686,7 +686,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
rcu_read_lock();
migrate_disable();
- ret = BPF_PROG_RUN(prog, ctx);
+ ret = bpf_prog_run(prog, ctx);
migrate_enable();
rcu_read_unlock();
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 70f6fd4fa305..d6731c32864e 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -28,6 +28,7 @@ struct bpf_struct_ops_value {
struct bpf_struct_ops_map {
struct bpf_map map;
+ struct rcu_head rcu;
const struct bpf_struct_ops *st_ops;
/* protect map_update */
struct mutex lock;
@@ -622,6 +623,14 @@ bool bpf_struct_ops_get(const void *kdata)
return refcount_inc_not_zero(&kvalue->refcnt);
}
+static void bpf_struct_ops_put_rcu(struct rcu_head *head)
+{
+ struct bpf_struct_ops_map *st_map;
+
+ st_map = container_of(head, struct bpf_struct_ops_map, rcu);
+ bpf_map_put(&st_map->map);
+}
+
void bpf_struct_ops_put(const void *kdata)
{
struct bpf_struct_ops_value *kvalue;
@@ -632,6 +641,17 @@ void bpf_struct_ops_put(const void *kdata)
st_map = container_of(kvalue, struct bpf_struct_ops_map,
kvalue);
- bpf_map_put(&st_map->map);
+ /* The struct_ops's function may switch to another struct_ops.
+ *
+ * For example, bpf_tcp_cc_x->init() may switch to
+ * another tcp_cc_y by calling
+ * setsockopt(TCP_CONGESTION, "tcp_cc_y").
+ * During the switch, bpf_struct_ops_put(tcp_cc_x) is called
+ * and its map->refcnt may reach 0 which then free its
+ * trampoline image while tcp_cc_x is still running.
+ *
+ * Thus, a rcu grace period is needed here.
+ */
+ call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu);
}
}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 3ce75758d394..ebfa8bc90892 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -317,15 +317,13 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_owner_storage_ptr = task_storage_ptr,
};
-BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
-
const struct bpf_func_proto bpf_task_storage_get_proto = {
.func = bpf_task_storage_get,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
- .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+ .arg2_btf_id = &btf_task_struct_ids[0],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
@@ -336,5 +334,5 @@ const struct bpf_func_proto bpf_task_storage_delete_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
- .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+ .arg2_btf_id = &btf_task_struct_ids[0],
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c395024610ed..dfe61df4f974 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6213,3 +6213,5 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
.arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
};
+
+BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index b567ca46555c..03145d45e3d5 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -19,7 +19,7 @@
#include "../cgroup/cgroup-internal.h"
-DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_BPF_ATTACH_TYPE);
+DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
void cgroup_bpf_offline(struct cgroup *cgrp)
@@ -113,12 +113,12 @@ static void cgroup_bpf_release(struct work_struct *work)
struct list_head *storages = &cgrp->bpf.storages;
struct bpf_cgroup_storage *storage, *stmp;
- unsigned int type;
+ unsigned int atype;
mutex_lock(&cgroup_mutex);
- for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
- struct list_head *progs = &cgrp->bpf.progs[type];
+ for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
+ struct list_head *progs = &cgrp->bpf.progs[atype];
struct bpf_prog_list *pl, *pltmp;
list_for_each_entry_safe(pl, pltmp, progs, node) {
@@ -128,10 +128,10 @@ static void cgroup_bpf_release(struct work_struct *work)
if (pl->link)
bpf_cgroup_link_auto_detach(pl->link);
kfree(pl);
- static_branch_dec(&cgroup_bpf_enabled_key[type]);
+ static_branch_dec(&cgroup_bpf_enabled_key[atype]);
}
old_array = rcu_dereference_protected(
- cgrp->bpf.effective[type],
+ cgrp->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
bpf_prog_array_free(old_array);
}
@@ -196,7 +196,7 @@ static u32 prog_list_length(struct list_head *head)
* if parent has overridable or multi-prog, allow attaching
*/
static bool hierarchy_allows_attach(struct cgroup *cgrp,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup *p;
@@ -204,12 +204,12 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
if (!p)
return true;
do {
- u32 flags = p->bpf.flags[type];
+ u32 flags = p->bpf.flags[atype];
u32 cnt;
if (flags & BPF_F_ALLOW_MULTI)
return true;
- cnt = prog_list_length(&p->bpf.progs[type]);
+ cnt = prog_list_length(&p->bpf.progs[atype]);
WARN_ON_ONCE(cnt > 1);
if (cnt == 1)
return !!(flags & BPF_F_ALLOW_OVERRIDE);
@@ -225,7 +225,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp,
* to programs in this cgroup
*/
static int compute_effective_progs(struct cgroup *cgrp,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
struct bpf_prog_array **array)
{
struct bpf_prog_array_item *item;
@@ -236,8 +236,8 @@ static int compute_effective_progs(struct cgroup *cgrp,
/* count number of effective programs by walking parents */
do {
- if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
- cnt += prog_list_length(&p->bpf.progs[type]);
+ if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
+ cnt += prog_list_length(&p->bpf.progs[atype]);
p = cgroup_parent(p);
} while (p);
@@ -249,10 +249,10 @@ static int compute_effective_progs(struct cgroup *cgrp,
cnt = 0;
p = cgrp;
do {
- if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
continue;
- list_for_each_entry(pl, &p->bpf.progs[type], node) {
+ list_for_each_entry(pl, &p->bpf.progs[atype], node) {
if (!prog_list_prog(pl))
continue;
@@ -269,10 +269,10 @@ static int compute_effective_progs(struct cgroup *cgrp,
}
static void activate_effective_progs(struct cgroup *cgrp,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
struct bpf_prog_array *old_array)
{
- old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,
+ old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
@@ -328,7 +328,7 @@ cleanup:
}
static int update_effective_progs(struct cgroup *cgrp,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup_subsys_state *css;
int err;
@@ -340,7 +340,7 @@ static int update_effective_progs(struct cgroup *cgrp,
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
- err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+ err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
if (err)
goto cleanup;
}
@@ -357,7 +357,7 @@ static int update_effective_progs(struct cgroup *cgrp,
continue;
}
- activate_effective_progs(desc, type, desc->bpf.inactive);
+ activate_effective_progs(desc, atype, desc->bpf.inactive);
desc->bpf.inactive = NULL;
}
@@ -436,11 +436,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
enum bpf_attach_type type, u32 flags)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
- struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog_list *pl;
+ struct list_head *progs;
int err;
if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
@@ -454,10 +455,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
/* replace_prog implies BPF_F_REPLACE, and vice versa */
return -EINVAL;
- if (!hierarchy_allows_attach(cgrp, type))
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+
+ if (!hierarchy_allows_attach(cgrp, atype))
return -EPERM;
- if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
+ if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
/* Disallow attaching non-overridable on top
* of existing overridable in this cgroup.
* Disallow attaching multi-prog if overridable or none
@@ -490,16 +497,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
pl->prog = prog;
pl->link = link;
bpf_cgroup_storages_assign(pl->storage, storage);
- cgrp->bpf.flags[type] = saved_flags;
+ cgrp->bpf.flags[atype] = saved_flags;
- err = update_effective_progs(cgrp, type);
+ err = update_effective_progs(cgrp, atype);
if (err)
goto cleanup;
if (old_prog)
bpf_prog_put(old_prog);
else
- static_branch_inc(&cgroup_bpf_enabled_key[type]);
+ static_branch_inc(&cgroup_bpf_enabled_key[atype]);
bpf_cgroup_storages_link(new_storage, cgrp, type);
return 0;
@@ -520,7 +527,7 @@ cleanup:
* all descendant cgroups. This function is guaranteed to succeed.
*/
static void replace_effective_prog(struct cgroup *cgrp,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
struct bpf_cgroup_link *link)
{
struct bpf_prog_array_item *item;
@@ -539,10 +546,10 @@ static void replace_effective_prog(struct cgroup *cgrp,
/* find position of link in effective progs array */
for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
- if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
continue;
- head = &cg->bpf.progs[type];
+ head = &cg->bpf.progs[atype];
list_for_each_entry(pl, head, node) {
if (!prog_list_prog(pl))
continue;
@@ -554,7 +561,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
found:
BUG_ON(!cg);
progs = rcu_dereference_protected(
- desc->bpf.effective[type],
+ desc->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
item = &progs->items[pos];
WRITE_ONCE(item->prog, link->link.prog);
@@ -574,11 +581,18 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
struct bpf_cgroup_link *link,
struct bpf_prog *new_prog)
{
- struct list_head *progs = &cgrp->bpf.progs[link->type];
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
struct bpf_prog_list *pl;
+ struct list_head *progs;
bool found = false;
+ atype = to_cgroup_bpf_attach_type(link->type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+
if (link->link.prog->type != new_prog->type)
return -EINVAL;
@@ -592,7 +606,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
return -ENOENT;
old_prog = xchg(&link->link.prog, new_prog);
- replace_effective_prog(cgrp, link->type, link);
+ replace_effective_prog(cgrp, atype, link);
bpf_prog_put(old_prog);
return 0;
}
@@ -667,12 +681,20 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type)
{
- struct list_head *progs = &cgrp->bpf.progs[type];
- u32 flags = cgrp->bpf.flags[type];
- struct bpf_prog_list *pl;
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
+ struct bpf_prog_list *pl;
+ struct list_head *progs;
+ u32 flags;
int err;
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+ flags = cgrp->bpf.flags[atype];
+
if (prog && link)
/* only one of prog or link can be specified */
return -EINVAL;
@@ -686,7 +708,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
pl->prog = NULL;
pl->link = NULL;
- err = update_effective_progs(cgrp, type);
+ err = update_effective_progs(cgrp, atype);
if (err)
goto cleanup;
@@ -695,10 +717,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
- cgrp->bpf.flags[type] = 0;
+ cgrp->bpf.flags[atype] = 0;
if (old_prog)
bpf_prog_put(old_prog);
- static_branch_dec(&cgroup_bpf_enabled_key[type]);
+ static_branch_dec(&cgroup_bpf_enabled_key[atype]);
return 0;
cleanup:
@@ -714,13 +736,21 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
{
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
enum bpf_attach_type type = attr->query.attach_type;
- struct list_head *progs = &cgrp->bpf.progs[type];
- u32 flags = cgrp->bpf.flags[type];
+ enum cgroup_bpf_attach_type atype;
struct bpf_prog_array *effective;
+ struct list_head *progs;
struct bpf_prog *prog;
int cnt, ret = 0, i;
+ u32 flags;
- effective = rcu_dereference_protected(cgrp->bpf.effective[type],
+ atype = to_cgroup_bpf_attach_type(type);
+ if (atype < 0)
+ return -EINVAL;
+
+ progs = &cgrp->bpf.progs[atype];
+ flags = cgrp->bpf.flags[atype];
+
+ effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
lockdep_is_held(&cgroup_mutex));
if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
@@ -925,14 +955,14 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
link->cgroup = cgrp;
link->type = attr->link_create.attach_type;
- err = bpf_link_prime(&link->link, &link_primer);
+ err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
goto out_put_cgroup;
}
- err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
- BPF_F_ALLOW_MULTI);
+ err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
+ link->type, BPF_F_ALLOW_MULTI);
if (err) {
bpf_link_cleanup(&link_primer);
goto out_put_cgroup;
@@ -986,7 +1016,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
*/
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
@@ -1008,12 +1038,12 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
/* compute pointers for the bpf prog */
bpf_compute_and_save_data_end(skb, &saved_data_end);
- if (type == BPF_CGROUP_INET_EGRESS) {
+ if (atype == CGROUP_INET_EGRESS) {
ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
- cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
+ cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
} else {
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
- __bpf_prog_run_save_cb);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
+ __bpf_prog_run_save_cb);
ret = (ret == 1 ? 0 : -EPERM);
}
bpf_restore_data_end(skb, saved_data_end);
@@ -1038,12 +1068,12 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
* and if it returned != 1 during execution. In all other cases, 0 is returned.
*/
int __cgroup_bpf_run_filter_sk(struct sock *sk,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret;
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run);
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -1065,7 +1095,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
*/
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
- enum bpf_attach_type type,
+ enum cgroup_bpf_attach_type atype,
void *t_ctx,
u32 *flags)
{
@@ -1090,8 +1120,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
}
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx,
- BPF_PROG_RUN, flags);
+ ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run, flags);
return ret == 1 ? 0 : -EPERM;
}
@@ -1115,19 +1145,19 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
*/
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
int ret;
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
- BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
+ bpf_prog_run);
return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
- short access, enum bpf_attach_type type)
+ short access, enum cgroup_bpf_attach_type atype)
{
struct cgroup *cgrp;
struct bpf_cgroup_dev_ctx ctx = {
@@ -1135,12 +1165,12 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
.major = major,
.minor = minor,
};
- int allow = 1;
+ int allow;
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
- BPF_PROG_RUN);
+ allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+ bpf_prog_run);
rcu_read_unlock();
return !allow;
@@ -1231,7 +1261,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
struct ctl_table *table, int write,
char **buf, size_t *pcount, loff_t *ppos,
- enum bpf_attach_type type)
+ enum cgroup_bpf_attach_type atype)
{
struct bpf_sysctl_kern ctx = {
.head = head,
@@ -1271,7 +1301,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
rcu_read_lock();
cgrp = task_dfl_cgroup(current);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
rcu_read_unlock();
kfree(ctx.cur_val);
@@ -1289,7 +1319,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
#ifdef CONFIG_NET
static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
- enum bpf_attach_type attach_type)
+ enum cgroup_bpf_attach_type attach_type)
{
struct bpf_prog_array *prog_array;
bool empty;
@@ -1364,7 +1394,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
* attached to the hook so we don't waste time allocating
* memory and locking the socket.
*/
- if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
+ if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT))
return 0;
/* Allocate a bit more than the initial user buffer for
@@ -1385,8 +1415,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
- &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
+ &ctx, bpf_prog_run);
release_sock(sk);
if (!ret) {
@@ -1460,7 +1490,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
* attached to the hook so we don't waste time allocating
* memory and locking the socket.
*/
- if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
+ if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT))
return retval;
ctx.optlen = max_optlen;
@@ -1495,8 +1525,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
}
lock_sock(sk);
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
- &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+ &ctx, bpf_prog_run);
release_sock(sk);
if (!ret) {
@@ -1556,8 +1586,8 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
* be called if that data shouldn't be "exported".
*/
- ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
- &ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+ &ctx, bpf_prog_run);
if (!ret)
return -EPERM;
@@ -1846,15 +1876,41 @@ const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
const struct bpf_prog_ops cg_sysctl_prog_ops = {
};
+#ifdef CONFIG_NET
+BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
+{
+ const struct net *net = ctx ? sock_net(ctx->sk) : &init_net;
+
+ return net->net_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = {
+ .func = bpf_get_netns_cookie_sockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+#endif
+
static const struct bpf_func_proto *
cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
#ifdef CONFIG_NET
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sockopt_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_setsockopt:
+ if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
+ return &bpf_sk_setsockopt_proto;
+ return NULL;
+ case BPF_FUNC_getsockopt:
+ if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
+ return &bpf_sk_getsockopt_proto;
+ return NULL;
#endif
#ifdef CONFIG_INET
case BPF_FUNC_tcp_sock:
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 82af6279992d..9f4636d021b1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1564,7 +1564,7 @@ select_insn:
if (unlikely(index >= array->map.max_entries))
goto out;
- if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
+ if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
goto out;
tail_call_cnt++;
@@ -1879,7 +1879,7 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
* @err: pointer to error variable
*
* Try to JIT eBPF program, if JIT is not available, use interpreter.
- * The BPF program will be executed via BPF_PROG_RUN() macro.
+ * The BPF program will be executed via bpf_prog_run() function.
*
* Return: the &fp argument along with &err set to 0 for success or
* a negative errno code on failure
@@ -2119,13 +2119,13 @@ int bpf_prog_array_update_at(struct bpf_prog_array *array, int index,
int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *exclude_prog,
struct bpf_prog *include_prog,
+ u64 bpf_cookie,
struct bpf_prog_array **new_array)
{
int new_prog_cnt, carry_prog_cnt = 0;
- struct bpf_prog_array_item *existing;
+ struct bpf_prog_array_item *existing, *new;
struct bpf_prog_array *array;
bool found_exclude = false;
- int new_prog_idx = 0;
/* Figure out how many existing progs we need to carry over to
* the new array.
@@ -2162,20 +2162,27 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL);
if (!array)
return -ENOMEM;
+ new = array->items;
/* Fill in the new prog array */
if (carry_prog_cnt) {
existing = old_array->items;
- for (; existing->prog; existing++)
- if (existing->prog != exclude_prog &&
- existing->prog != &dummy_bpf_prog.prog) {
- array->items[new_prog_idx++].prog =
- existing->prog;
- }
+ for (; existing->prog; existing++) {
+ if (existing->prog == exclude_prog ||
+ existing->prog == &dummy_bpf_prog.prog)
+ continue;
+
+ new->prog = existing->prog;
+ new->bpf_cookie = existing->bpf_cookie;
+ new++;
+ }
}
- if (include_prog)
- array->items[new_prog_idx++].prog = include_prog;
- array->items[new_prog_idx].prog = NULL;
+ if (include_prog) {
+ new->prog = include_prog;
+ new->bpf_cookie = bpf_cookie;
+ new++;
+ }
+ new->prog = NULL;
*new_array = array;
return 0;
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 04010a5e2ddb..9aabf84afd4b 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -918,6 +918,20 @@ fmt_str:
num_spec++;
continue;
+ } else if (fmt[i] == 'c') {
+ if (!tmp_buf)
+ goto nocopy_fmt;
+
+ if (tmp_buf_end == tmp_buf) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ *tmp_buf = raw_args[num_spec];
+ tmp_buf++;
+ num_spec++;
+
+ continue;
}
sizeof_cur_arg = sizeof(int);
@@ -1318,10 +1332,12 @@ out:
}
const struct bpf_func_proto bpf_get_current_task_proto __weak;
+const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_proto __weak;
const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
+const struct bpf_func_proto bpf_task_pt_regs_proto __weak;
const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
@@ -1403,6 +1419,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return bpf_get_trace_printk_proto();
case BPF_FUNC_get_current_task:
return &bpf_get_current_task_proto;
+ case BPF_FUNC_get_current_task_btf:
+ return &bpf_get_current_task_btf_proto;
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
@@ -1417,6 +1435,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_snprintf_btf_proto;
case BPF_FUNC_snprintf:
return &bpf_snprintf_proto;
+ case BPF_FUNC_task_pt_regs:
+ return &bpf_task_pt_regs_proto;
default:
return NULL;
}
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 6fbc2abe9c91..e8eefdf8cf3e 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -530,14 +530,12 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
return res;
}
-BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct)
-
const struct bpf_func_proto bpf_get_task_stack_proto = {
.func = bpf_get_task_stack,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
- .arg1_btf_id = &bpf_get_task_stack_btf_ids[0],
+ .arg1_btf_id = &btf_task_struct_ids[0],
.arg2_type = ARG_PTR_TO_UNINIT_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9a2068e39d23..4e50c0bfdb7d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1013,7 +1013,7 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
static void *__bpf_copy_key(void __user *ukey, u64 key_size)
{
if (key_size)
- return memdup_user(ukey, key_size);
+ return vmemdup_user(ukey, key_size);
if (ukey)
return ERR_PTR(-EINVAL);
@@ -1024,7 +1024,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size)
{
if (key_size)
- return memdup_bpfptr(ukey, key_size);
+ return kvmemdup_bpfptr(ukey, key_size);
if (!bpfptr_is_null(ukey))
return ERR_PTR(-EINVAL);
@@ -1076,7 +1076,7 @@ static int map_lookup_elem(union bpf_attr *attr)
value_size = bpf_map_value_size(map);
err = -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -1091,9 +1091,9 @@ static int map_lookup_elem(union bpf_attr *attr)
err = 0;
free_value:
- kfree(value);
+ kvfree(value);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1137,16 +1137,10 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
goto err_put;
}
- if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
- map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
- value_size = round_up(map->value_size, 8) * num_possible_cpus();
- else
- value_size = map->value_size;
+ value_size = bpf_map_value_size(map);
err = -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -1157,9 +1151,9 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
err = bpf_map_update_value(map, f, key, value, attr->flags);
free_value:
- kfree(value);
+ kvfree(value);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1211,7 +1205,7 @@ static int map_delete_elem(union bpf_attr *attr)
bpf_enable_instrumentation();
maybe_wait_bpf_programs(map);
out:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1253,7 +1247,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
err = -ENOMEM;
- next_key = kmalloc(map->key_size, GFP_USER);
+ next_key = kvmalloc(map->key_size, GFP_USER);
if (!next_key)
goto free_key;
@@ -1276,9 +1270,9 @@ out:
err = 0;
free_next_key:
- kfree(next_key);
+ kvfree(next_key);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -1305,7 +1299,7 @@ int generic_map_delete_batch(struct bpf_map *map,
if (!max_count)
return 0;
- key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
@@ -1332,7 +1326,7 @@ int generic_map_delete_batch(struct bpf_map *map,
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
- kfree(key);
+ kvfree(key);
return err;
}
@@ -1363,13 +1357,13 @@ int generic_map_update_batch(struct bpf_map *map,
if (!max_count)
return 0;
- key = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!key)
return -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value) {
- kfree(key);
+ kvfree(key);
return -ENOMEM;
}
@@ -1390,8 +1384,8 @@ int generic_map_update_batch(struct bpf_map *map,
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
- kfree(value);
- kfree(key);
+ kvfree(value);
+ kvfree(key);
return err;
}
@@ -1425,13 +1419,13 @@ int generic_map_lookup_batch(struct bpf_map *map,
if (put_user(0, &uattr->batch.count))
return -EFAULT;
- buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
+ buf_prevkey = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!buf_prevkey)
return -ENOMEM;
- buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
+ buf = kvmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
if (!buf) {
- kfree(buf_prevkey);
+ kvfree(buf_prevkey);
return -ENOMEM;
}
@@ -1491,8 +1485,8 @@ int generic_map_lookup_batch(struct bpf_map *map,
err = -EFAULT;
free_buf:
- kfree(buf_prevkey);
- kfree(buf);
+ kvfree(buf_prevkey);
+ kvfree(buf);
return err;
}
@@ -1547,7 +1541,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
value_size = bpf_map_value_size(map);
err = -ENOMEM;
- value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
+ value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
@@ -1579,9 +1573,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
err = 0;
free_value:
- kfree(value);
+ kvfree(value);
free_key:
- kfree(key);
+ kvfree(key);
err_put:
fdput(f);
return err;
@@ -2906,6 +2900,79 @@ static const struct bpf_link_ops bpf_raw_tp_link_lops = {
.fill_link_info = bpf_raw_tp_link_fill_link_info,
};
+#ifdef CONFIG_PERF_EVENTS
+struct bpf_perf_link {
+ struct bpf_link link;
+ struct file *perf_file;
+};
+
+static void bpf_perf_link_release(struct bpf_link *link)
+{
+ struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+ struct perf_event *event = perf_link->perf_file->private_data;
+
+ perf_event_free_bpf_prog(event);
+ fput(perf_link->perf_file);
+}
+
+static void bpf_perf_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_perf_link *perf_link = container_of(link, struct bpf_perf_link, link);
+
+ kfree(perf_link);
+}
+
+static const struct bpf_link_ops bpf_perf_link_lops = {
+ .release = bpf_perf_link_release,
+ .dealloc = bpf_perf_link_dealloc,
+};
+
+static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct bpf_link_primer link_primer;
+ struct bpf_perf_link *link;
+ struct perf_event *event;
+ struct file *perf_file;
+ int err;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ perf_file = perf_event_get(attr->link_create.target_fd);
+ if (IS_ERR(perf_file))
+ return PTR_ERR(perf_file);
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link) {
+ err = -ENOMEM;
+ goto out_put_file;
+ }
+ bpf_link_init(&link->link, BPF_LINK_TYPE_PERF_EVENT, &bpf_perf_link_lops, prog);
+ link->perf_file = perf_file;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ goto out_put_file;
+ }
+
+ event = perf_file->private_data;
+ err = perf_event_set_bpf_prog(event, prog, attr->link_create.perf_event.bpf_cookie);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ goto out_put_file;
+ }
+ /* perf_event_set_bpf_prog() doesn't take its own refcnt on prog */
+ bpf_prog_inc(prog);
+
+ return bpf_link_settle(&link_primer);
+
+out_put_file:
+ fput(perf_file);
+ return err;
+}
+#endif /* CONFIG_PERF_EVENTS */
+
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
@@ -4147,15 +4214,26 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
if (ret)
goto out;
- if (prog->type == BPF_PROG_TYPE_EXT) {
+ switch (prog->type) {
+ case BPF_PROG_TYPE_EXT:
ret = tracing_bpf_link_attach(attr, uattr, prog);
goto out;
- }
-
- ptype = attach_type_to_prog_type(attr->link_create.attach_type);
- if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
- ret = -EINVAL;
- goto out;
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_KPROBE:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ if (attr->link_create.attach_type != BPF_PERF_EVENT) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ptype = prog->type;
+ break;
+ default:
+ ptype = attach_type_to_prog_type(attr->link_create.attach_type);
+ if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
}
switch (ptype) {
@@ -4180,6 +4258,13 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
ret = bpf_xdp_link_attach(attr, prog);
break;
#endif
+#ifdef CONFIG_PERF_EVENTS
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_KPROBE:
+ ret = bpf_perf_link_attach(attr, prog);
+ break;
+#endif
default:
ret = -EINVAL;
}
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index b68cb5d6d6eb..b48750bfba5a 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -525,7 +525,6 @@ static const struct seq_operations task_vma_seq_ops = {
};
BTF_ID_LIST(btf_task_file_ids)
-BTF_ID(struct, task_struct)
BTF_ID(struct, file)
BTF_ID(struct, vm_area_struct)
@@ -591,19 +590,19 @@ static int __init task_iter_init(void)
{
int ret;
- task_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
+ task_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
ret = bpf_iter_reg_target(&task_reg_info);
if (ret)
return ret;
- task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
- task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
+ task_file_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
+ task_file_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[0];
ret = bpf_iter_reg_target(&task_file_reg_info);
if (ret)
return ret;
- task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_file_ids[0];
- task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[2];
+ task_vma_reg_info.ctx_arg_info[0].btf_id = btf_task_struct_ids[0];
+ task_vma_reg_info.ctx_arg_info[1].btf_id = btf_task_file_ids[1];
return bpf_iter_reg_target(&task_vma_reg_info);
}
late_initcall(task_iter_init);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index b2535acfe9db..fe1e857324e6 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -548,7 +548,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
u64_stats_update_end(&stats->syncp);
}
-/* The logic is similar to BPF_PROG_RUN, but with an explicit
+/* The logic is similar to bpf_prog_run(), but with an explicit
* rcu_read_lock() and migrate_disable() which are required
* for the trampoline. The macro is split into
* call __bpf_prog_enter
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9134aedfdb7d..047ac4b4703b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -12300,6 +12300,10 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
if (is_narrower_load && size < target_size) {
u8 shift = bpf_ctx_narrow_access_offset(
off, size, size_default) * 8;
+ if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
+ verbose(env, "bpf verifier narrow ctx load misconfigured\n");
+ return -EINVAL;
+ }
if (ctx_field_size <= 4) {
if (shift)
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
@@ -12388,7 +12392,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
subprog_end = env->subprog_info[i + 1].start;
len = subprog_end - subprog_start;
- /* BPF_PROG_RUN doesn't call subprogs directly,
+ /* bpf_prog_run() doesn't call subprogs directly,
* hence main prog stats include the runtime of subprogs.
* subprogs don't have IDs and not reachable via prog_get_next_id
* func[i]->stats will never be accessed and stays NULL
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cb1f9b8392e..011cc5069b7b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4697,7 +4697,6 @@ errout:
}
static void perf_event_free_filter(struct perf_event *event);
-static void perf_event_free_bpf_prog(struct perf_event *event);
static void free_event_rcu(struct rcu_head *head)
{
@@ -5574,7 +5573,6 @@ static inline int perf_fget_light(int fd, struct fd *p)
static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
static int perf_copy_attr(struct perf_event_attr __user *uattr,
struct perf_event_attr *attr);
@@ -5637,7 +5635,22 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
return perf_event_set_filter(event, (void __user *)arg);
case PERF_EVENT_IOC_SET_BPF:
- return perf_event_set_bpf_prog(event, arg);
+ {
+ struct bpf_prog *prog;
+ int err;
+
+ prog = bpf_prog_get(arg);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ err = perf_event_set_bpf_prog(event, prog, 0);
+ if (err) {
+ bpf_prog_put(prog);
+ return err;
+ }
+
+ return 0;
+ }
case PERF_EVENT_IOC_PAUSE_OUTPUT: {
struct perf_buffer *rb;
@@ -9907,13 +9920,16 @@ static void bpf_overflow_handler(struct perf_event *event,
.data = data,
.event = event,
};
+ struct bpf_prog *prog;
int ret = 0;
ctx.regs = perf_arch_bpf_user_pt_regs(regs);
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
goto out;
rcu_read_lock();
- ret = BPF_PROG_RUN(event->prog, &ctx);
+ prog = READ_ONCE(event->prog);
+ if (prog)
+ ret = bpf_prog_run(prog, &ctx);
rcu_read_unlock();
out:
__this_cpu_dec(bpf_prog_active);
@@ -9923,10 +9939,10 @@ out:
event->orig_overflow_handler(event, data, regs);
}
-static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+static int perf_event_set_bpf_handler(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
{
- struct bpf_prog *prog;
-
if (event->overflow_handler_context)
/* hw breakpoint or kernel counter */
return -EINVAL;
@@ -9934,9 +9950,8 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
if (event->prog)
return -EEXIST;
- prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
+ if (prog->type != BPF_PROG_TYPE_PERF_EVENT)
+ return -EINVAL;
if (event->attr.precise_ip &&
prog->call_get_stack &&
@@ -9952,11 +9967,11 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
* attached to perf_sample_data, do not allow attaching BPF
* program that calls bpf_get_[stack|stackid].
*/
- bpf_prog_put(prog);
return -EPROTO;
}
event->prog = prog;
+ event->bpf_cookie = bpf_cookie;
event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
return 0;
@@ -9974,7 +9989,9 @@ static void perf_event_free_bpf_handler(struct perf_event *event)
bpf_prog_put(prog);
}
#else
-static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+static int perf_event_set_bpf_handler(struct perf_event *event,
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
{
return -EOPNOTSUPP;
}
@@ -10002,14 +10019,13 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
return false;
}
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
+ u64 bpf_cookie)
{
bool is_kprobe, is_tracepoint, is_syscall_tp;
- struct bpf_prog *prog;
- int ret;
if (!perf_event_is_tracing(event))
- return perf_event_set_bpf_handler(event, prog_fd);
+ return perf_event_set_bpf_handler(event, prog, bpf_cookie);
is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
@@ -10018,41 +10034,27 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
/* bpf programs can only be attached to u/kprobe or tracepoint */
return -EINVAL;
- prog = bpf_prog_get(prog_fd);
- if (IS_ERR(prog))
- return PTR_ERR(prog);
-
if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
- (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
- /* valid fd, but invalid bpf program type */
- bpf_prog_put(prog);
+ (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
return -EINVAL;
- }
/* Kprobe override only works for kprobes, not uprobes. */
if (prog->kprobe_override &&
- !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
- bpf_prog_put(prog);
+ !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
return -EINVAL;
- }
if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event);
- if (prog->aux->max_ctx_offset > off) {
- bpf_prog_put(prog);
+ if (prog->aux->max_ctx_offset > off)
return -EACCES;
- }
}
- ret = perf_event_attach_bpf_prog(event, prog);
- if (ret)
- bpf_prog_put(prog);
- return ret;
+ return perf_event_attach_bpf_prog(event, prog, bpf_cookie);
}
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
{
if (!perf_event_is_tracing(event)) {
perf_event_free_bpf_handler(event);
@@ -10071,12 +10073,13 @@ static void perf_event_free_filter(struct perf_event *event)
{
}
-static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
+ u64 bpf_cookie)
{
return -ENOENT;
}
-static void perf_event_free_bpf_prog(struct perf_event *event)
+void perf_event_free_bpf_prog(struct perf_event *event)
{
}
#endif /* CONFIG_EVENT_TRACING */
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0da94e1d6af9..8e2eb950aa82 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -124,7 +124,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
* out of events when it was updated in between this and the
* rcu_dereference() which is accepted risk.
*/
- ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
+ ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
out:
__this_cpu_dec(bpf_prog_active);
@@ -714,13 +714,28 @@ BPF_CALL_0(bpf_get_current_task_btf)
return (unsigned long) current;
}
-BTF_ID_LIST_SINGLE(bpf_get_current_btf_ids, struct, task_struct)
-
-static const struct bpf_func_proto bpf_get_current_task_btf_proto = {
+const struct bpf_func_proto bpf_get_current_task_btf_proto = {
.func = bpf_get_current_task_btf,
.gpl_only = true,
.ret_type = RET_PTR_TO_BTF_ID,
- .ret_btf_id = &bpf_get_current_btf_ids[0],
+ .ret_btf_id = &btf_task_struct_ids[0],
+};
+
+BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
+{
+ return (unsigned long) task_pt_regs(task);
+}
+
+BTF_ID_LIST(bpf_task_pt_regs_ids)
+BTF_ID(struct, pt_regs)
+
+const struct bpf_func_proto bpf_task_pt_regs_proto = {
+ .func = bpf_task_pt_regs,
+ .gpl_only = true,
+ .arg1_type = ARG_PTR_TO_BTF_ID,
+ .arg1_btf_id = &btf_task_struct_ids[0],
+ .ret_type = RET_PTR_TO_BTF_ID,
+ .ret_btf_id = &bpf_task_pt_regs_ids[0],
};
BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
@@ -975,7 +990,34 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
.arg1_type = ARG_PTR_TO_CTX,
};
-const struct bpf_func_proto *
+BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
+{
+ struct bpf_trace_run_ctx *run_ctx;
+
+ run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
+ return run_ctx->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
+ .func = bpf_get_attach_cookie_trace,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
+{
+ return ctx->event->bpf_cookie;
+}
+
+static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
+ .func = bpf_get_attach_cookie_pe,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
@@ -1005,6 +1047,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_current_task_proto;
case BPF_FUNC_get_current_task_btf:
return &bpf_get_current_task_btf_proto;
+ case BPF_FUNC_task_pt_regs:
+ return &bpf_task_pt_regs_proto;
case BPF_FUNC_get_current_uid_gid:
return &bpf_get_current_uid_gid_proto;
case BPF_FUNC_get_current_comm:
@@ -1109,6 +1153,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
#endif
case BPF_FUNC_get_func_ip:
return &bpf_get_func_ip_proto_kprobe;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1219,6 +1265,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stackid_proto_tp;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto_tp;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_trace;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1326,6 +1374,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_perf_prog_read_value_proto;
case BPF_FUNC_read_branch_records:
return &bpf_read_branch_records_proto;
+ case BPF_FUNC_get_attach_cookie:
+ return &bpf_get_attach_cookie_proto_pe;
default:
return bpf_tracing_func_proto(func_id, prog);
}
@@ -1675,7 +1725,8 @@ static DEFINE_MUTEX(bpf_event_mutex);
#define BPF_TRACE_MAX_PROGS 64
int perf_event_attach_bpf_prog(struct perf_event *event,
- struct bpf_prog *prog)
+ struct bpf_prog *prog,
+ u64 bpf_cookie)
{
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
@@ -1702,12 +1753,13 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
goto unlock;
}
- ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
+ ret = bpf_prog_array_copy(old_array, NULL, prog, bpf_cookie, &new_array);
if (ret < 0)
goto unlock;
/* set the new array to event->tp_event and set event->prog */
event->prog = prog;
+ event->bpf_cookie = bpf_cookie;
rcu_assign_pointer(event->tp_event->prog_array, new_array);
bpf_prog_array_free(old_array);
@@ -1728,7 +1780,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
goto unlock;
old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
- ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
+ ret = bpf_prog_array_copy(old_array, event->prog, NULL, 0, &new_array);
if (ret == -ENOENT)
goto unlock;
if (ret < 0) {
@@ -1816,7 +1868,7 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
{
cant_sleep();
rcu_read_lock();
- (void) BPF_PROG_RUN(prog, args);
+ (void) bpf_prog_run(prog, args);
rcu_read_unlock();
}
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 44d8197bbffb..830a18ecffc8 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5163,7 +5163,7 @@ static struct bpf_test tests[] = {
{ { 0, -1 } }
},
{
- "ALU64_ARSH_K: Zero shoft",
+ "ALU64_ARSH_K: Zero shift",
.u.insns_int = {
BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
BPF_ALU64_IMM(BPF_ARSH, R0, 0),
@@ -8616,7 +8616,7 @@ static int __run_one(const struct bpf_prog *fp, const void *data,
start = ktime_get_ns();
for (i = 0; i < runs; i++)
- ret = BPF_PROG_RUN(fp, data);
+ ret = bpf_prog_run(fp, data);
finish = ktime_get_ns();
migrate_enable();
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 4b855af267b1..2eb0e55ef54d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -116,7 +116,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
- *retval = BPF_PROG_RUN(prog, ctx);
+ *retval = bpf_prog_run(prog, ctx);
} while (bpf_test_timer_continue(&t, repeat, &ret, time));
bpf_reset_run_ctx(old_ctx);
bpf_test_timer_leave(&t);
@@ -327,7 +327,7 @@ __bpf_prog_test_run_raw_tp(void *data)
struct bpf_raw_tp_test_run_info *info = data;
rcu_read_lock();
- info->retval = BPF_PROG_RUN(info->prog, info->ctx);
+ info->retval = bpf_prog_run(info->prog, info->ctx);
rcu_read_unlock();
}
@@ -989,7 +989,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
bpf_test_timer_enter(&t);
do {
ctx.selected_sk = NULL;
- retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
+ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run);
} while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
bpf_test_timer_leave(&t);
diff --git a/net/core/filter.c b/net/core/filter.c
index 3aca07c44fad..2e32cee2c469 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -114,7 +114,7 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
* Run the eBPF program and then cut skb->data to correct size returned by
* the program. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
+ * wrapper to bpf_prog_run. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -4676,6 +4676,30 @@ static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
.arg1_type = ARG_PTR_TO_CTX_OR_NULL,
};
+BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
+{
+ return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
+ .func = bpf_get_netns_cookie_sock_ops,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
+BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
+{
+ return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
+}
+
+static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
+ .func = bpf_get_netns_cookie_sk_msg,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
+};
+
BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
{
struct sock *sk = sk_to_full_sk(skb->sk);
@@ -5027,6 +5051,12 @@ err_clear:
BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
int, optname, char *, optval, int, optlen)
{
+ if (level == SOL_TCP && optname == TCP_CONGESTION) {
+ if (optlen >= sizeof("cdg") - 1 &&
+ !strncmp("cdg", optval, optlen))
+ return -ENOTSUPP;
+ }
+
return _bpf_setsockopt(sk, level, optname, optval, optlen);
}
@@ -7491,6 +7521,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sock_ops_proto;
#ifdef CONFIG_INET
case BPF_FUNC_load_hdr_opt:
return &bpf_sock_ops_load_hdr_opt_proto;
@@ -7537,6 +7569,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_get_netns_cookie:
+ return &bpf_get_netns_cookie_sk_msg_proto;
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;
@@ -10115,7 +10149,7 @@ struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
enum sk_action action;
bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
- action = BPF_PROG_RUN(prog, &reuse_kern);
+ action = bpf_prog_run(prog, &reuse_kern);
if (action == SK_PASS)
return reuse_kern.selected_sk;
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index e33fde06d528..dd4cf01d1e0a 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -103,7 +103,7 @@ static struct bpf_prog *ptp_insns __read_mostly;
unsigned int ptp_classify_raw(const struct sk_buff *skb)
{
- return BPF_PROG_RUN(ptp_insns, skb);
+ return bpf_prog_run(ptp_insns, skb);
}
EXPORT_SYMBOL_GPL(ptp_classify_raw);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index ae5fa4338d9c..e252b8ec2b85 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1494,6 +1494,7 @@ void sock_map_unhash(struct sock *sk)
rcu_read_unlock();
saved_unhash(sk);
}
+EXPORT_SYMBOL_GPL(sock_map_unhash);
void sock_map_close(struct sock *sk, long timeout)
{
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0e4d758c2585..1d816a5fd3eb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -452,7 +452,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
- BPF_CGROUP_INET4_BIND, &flags);
+ CGROUP_INET4_BIND, &flags);
if (err)
return err;
@@ -781,7 +781,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET4_GETPEERNAME,
+ CGROUP_INET4_GETPEERNAME,
NULL);
} else {
__be32 addr = inet->inet_rcv_saddr;
@@ -790,7 +790,7 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET4_GETSOCKNAME,
+ CGROUP_INET4_GETSOCKNAME,
NULL);
}
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 9e41eff4a685..0dcee9df1326 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -10,6 +10,9 @@
#include <net/tcp.h>
#include <net/bpf_sk_storage.h>
+/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */
+extern struct bpf_struct_ops bpf_tcp_congestion_ops;
+
static u32 optional_ops[] = {
offsetof(struct tcp_congestion_ops, init),
offsetof(struct tcp_congestion_ops, release),
@@ -163,6 +166,19 @@ static const struct bpf_func_proto bpf_tcp_send_ack_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u32 prog_ops_moff(const struct bpf_prog *prog)
+{
+ const struct btf_member *m;
+ const struct btf_type *t;
+ u32 midx;
+
+ midx = prog->expected_attach_type;
+ t = bpf_tcp_congestion_ops.type;
+ m = &btf_type_member(t)[midx];
+
+ return btf_member_bit_offset(t, m) / 8;
+}
+
static const struct bpf_func_proto *
bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
const struct bpf_prog *prog)
@@ -174,6 +190,28 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
+ case BPF_FUNC_setsockopt:
+ /* Does not allow release() to call setsockopt.
+ * release() is called when the current bpf-tcp-cc
+ * is retiring. It is not allowed to call
+ * setsockopt() to make further changes which
+ * may potentially allocate new resources.
+ */
+ if (prog_ops_moff(prog) !=
+ offsetof(struct tcp_congestion_ops, release))
+ return &bpf_sk_setsockopt_proto;
+ return NULL;
+ case BPF_FUNC_getsockopt:
+ /* Since get/setsockopt is usually expected to
+ * be available together, disable getsockopt for
+ * release also to avoid usage surprise.
+ * The bpf-tcp-cc already has a more powerful way
+ * to read tcp_sock from the PTR_TO_BTF_ID.
+ */
+ if (prog_ops_moff(prog) !=
+ offsetof(struct tcp_congestion_ops, release))
+ return &bpf_sk_getsockopt_proto;
+ return NULL;
default:
return bpf_base_func_proto(func_id);
}
@@ -286,9 +324,6 @@ static void bpf_tcp_ca_unreg(void *kdata)
tcp_unregister_congestion_control(kdata);
}
-/* Avoid sparse warning. It is only used in bpf_struct_ops.c. */
-extern struct bpf_struct_ops bpf_tcp_congestion_ops;
-
struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1a742b710e54..8851c9463b4b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1143,7 +1143,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rcu_read_unlock();
}
- if (cgroup_bpf_enabled(BPF_CGROUP_UDP4_SENDMSG) && !connected) {
+ if (cgroup_bpf_enabled(CGROUP_UDP4_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP4_SENDMSG_LOCK(sk,
(struct sockaddr *)usin, &ipc.addr);
if (err)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d92c90d97763..b5878bb8e419 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -455,7 +455,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
- BPF_CGROUP_INET6_BIND, &flags);
+ CGROUP_INET6_BIND, &flags);
if (err)
return err;
@@ -532,7 +532,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETPEERNAME,
+ CGROUP_INET6_GETPEERNAME,
NULL);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -541,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETSOCKNAME,
+ CGROUP_INET6_GETSOCKNAME,
NULL);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c5e15e94bb00..ea53847b5b7e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1475,7 +1475,7 @@ do_udp_sendmsg:
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
- if (cgroup_bpf_enabled(BPF_CGROUP_UDP6_SENDMSG) && !connected) {
+ if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6, &fl6.saddr);
if (err)
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 13cf3f9b5938..849ac552a154 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -90,7 +90,7 @@ static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_bpf_info *info = par->matchinfo;
- return BPF_PROG_RUN(info->filter, skb);
+ return bpf_prog_run(info->filter, skb);
}
static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 040807aa15b9..5c36013339e1 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -47,11 +47,11 @@ static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act,
if (at_ingress) {
__skb_push(skb, skb->mac_len);
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(filter, skb);
+ filter_res = bpf_prog_run(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(filter, skb);
+ filter_res = bpf_prog_run(filter, skb);
}
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 3b472bafdc9d..df19a847829e 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -96,11 +96,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = bpf_prog_run(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
bpf_compute_data_pointers(skb);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = bpf_prog_run(prog->filter, skb);
}
if (prog->exts_integrated) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4cf0b1c47f0f..7cad52ba9cde 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -113,6 +113,7 @@
#include <linux/security.h>
#include <linux/freezer.h>
#include <linux/file.h>
+#include <linux/btf_ids.h>
#include "scm.h"
@@ -678,6 +679,8 @@ static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -731,6 +734,7 @@ static const struct proto_ops unix_stream_ops = {
.shutdown = unix_shutdown,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
+ .read_sock = unix_stream_read_sock,
.mmap = sock_no_mmap,
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
@@ -794,17 +798,35 @@ static void unix_close(struct sock *sk, long timeout)
*/
}
-struct proto unix_proto = {
- .name = "UNIX",
+static void unix_unhash(struct sock *sk)
+{
+ /* Nothing to do here, unix socket does not need a ->unhash().
+ * This is merely for sockmap.
+ */
+}
+
+struct proto unix_dgram_proto = {
+ .name = "UNIX-DGRAM",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
.close = unix_close,
#ifdef CONFIG_BPF_SYSCALL
- .psock_update_sk_prot = unix_bpf_update_proto,
+ .psock_update_sk_prot = unix_dgram_bpf_update_proto,
#endif
};
-static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
+struct proto unix_stream_proto = {
+ .name = "UNIX-STREAM",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct unix_sock),
+ .close = unix_close,
+ .unhash = unix_unhash,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = unix_stream_bpf_update_proto,
+#endif
+};
+
+static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
struct sock *sk = NULL;
struct unix_sock *u;
@@ -813,7 +835,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
- sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
+ if (type == SOCK_STREAM)
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
+ else /*dgram and seqpacket */
+ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
+
if (!sk)
goto out;
@@ -875,7 +901,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
+ return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
}
static int unix_release(struct socket *sock)
@@ -1289,7 +1315,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -ENOMEM;
/* create new sock for complete connection */
- newsk = unix_create1(sock_net(sk), NULL, 0);
+ newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
if (newsk == NULL)
goto out;
@@ -2326,8 +2352,10 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
struct sock *sk = sock->sk;
#ifdef CONFIG_BPF_SYSCALL
- if (sk->sk_prot != &unix_proto)
- return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot != &unix_dgram_proto)
+ return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
flags & ~MSG_DONTWAIT, NULL);
#endif
return __unix_dgram_recvmsg(sk, msg, size, flags);
@@ -2497,6 +2525,15 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
}
#endif
+static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ if (unlikely(sk->sk_state != TCP_ESTABLISHED))
+ return -ENOTCONN;
+
+ return unix_read_sock(sk, desc, recv_actor);
+}
+
static int unix_stream_read_generic(struct unix_stream_read_state *state,
bool freezable)
{
@@ -2722,6 +2759,20 @@ static int unix_stream_read_actor(struct sk_buff *skb,
return ret ?: chunk;
}
+int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t size, int flags)
+{
+ struct unix_stream_read_state state = {
+ .recv_actor = unix_stream_read_actor,
+ .socket = sk->sk_socket,
+ .msg = msg,
+ .size = size,
+ .flags = flags
+ };
+
+ return unix_stream_read_generic(&state, true);
+}
+
static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
@@ -2733,6 +2784,14 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
.flags = flags
};
+#ifdef CONFIG_BPF_SYSCALL
+ struct sock *sk = sock->sk;
+ const struct proto *prot = READ_ONCE(sk->sk_prot);
+
+ if (prot != &unix_stream_proto)
+ return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
+ flags & ~MSG_DONTWAIT, NULL);
+#endif
return unix_stream_read_generic(&state, true);
}
@@ -2793,7 +2852,10 @@ static int unix_shutdown(struct socket *sock, int mode)
(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
int peer_mode = 0;
+ const struct proto *prot = READ_ONCE(other->sk_prot);
+ if (prot->unhash)
+ prot->unhash(other);
if (mode&RCV_SHUTDOWN)
peer_mode |= SEND_SHUTDOWN;
if (mode&SEND_SHUTDOWN)
@@ -2802,10 +2864,12 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
- if (peer_mode == SHUTDOWN_MASK)
+ if (peer_mode == SHUTDOWN_MASK) {
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
- else if (peer_mode & RCV_SHUTDOWN)
+ other->sk_state = TCP_CLOSE;
+ } else if (peer_mode & RCV_SHUTDOWN) {
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
+ }
}
if (other)
sock_put(other);
@@ -3150,6 +3214,64 @@ static const struct seq_operations unix_seq_ops = {
.stop = unix_seq_stop,
.show = unix_seq_show,
};
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__unix {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct unix_sock *, unix_sk);
+ uid_t uid __aligned(8);
+};
+
+static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
+ struct unix_sock *unix_sk, uid_t uid)
+{
+ struct bpf_iter__unix ctx;
+
+ meta->seq_num--; /* skip SEQ_START_TOKEN */
+ ctx.meta = meta;
+ ctx.unix_sk = unix_sk;
+ ctx.uid = uid;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+ uid_t uid;
+
+ if (v == SEQ_START_TOKEN)
+ return 0;
+
+ uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, false);
+ return unix_prog_seq_show(prog, &meta, v, uid);
+}
+
+static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
+{
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ if (!v) {
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, true);
+ if (prog)
+ (void)unix_prog_seq_show(prog, &meta, v, 0);
+ }
+
+ unix_seq_stop(seq, v);
+}
+
+static const struct seq_operations bpf_iter_unix_seq_ops = {
+ .start = unix_seq_start,
+ .next = unix_seq_next,
+ .stop = bpf_iter_unix_seq_stop,
+ .show = bpf_iter_unix_seq_show,
+};
+#endif
#endif
static const struct net_proto_family unix_family_ops = {
@@ -3190,13 +3312,48 @@ static struct pernet_operations unix_net_ops = {
.exit = unix_net_exit,
};
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
+ struct unix_sock *unix_sk, uid_t uid)
+
+static const struct bpf_iter_seq_info unix_seq_info = {
+ .seq_ops = &bpf_iter_unix_seq_ops,
+ .init_seq_private = bpf_iter_init_seq_net,
+ .fini_seq_private = bpf_iter_fini_seq_net,
+ .seq_priv_size = sizeof(struct seq_net_private),
+};
+
+static struct bpf_iter_reg unix_reg_info = {
+ .target = "unix",
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__unix, unix_sk),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &unix_seq_info,
+};
+
+static void __init bpf_iter_register(void)
+{
+ unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
+ if (bpf_iter_reg_target(&unix_reg_info))
+ pr_warn("Warning: could not register bpf iterator unix\n");
+}
+#endif
+
static int __init af_unix_init(void)
{
int rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
- rc = proto_register(&unix_proto, 1);
+ rc = proto_register(&unix_dgram_proto, 1);
+ if (rc != 0) {
+ pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
+ goto out;
+ }
+
+ rc = proto_register(&unix_stream_proto, 1);
if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
goto out;
@@ -3205,6 +3362,11 @@ static int __init af_unix_init(void)
sock_register(&unix_family_ops);
register_pernet_subsys(&unix_net_ops);
unix_bpf_build_proto();
+
+#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+ bpf_iter_register();
+#endif
+
out:
return rc;
}
@@ -3212,7 +3374,8 @@ out:
static void __exit af_unix_exit(void)
{
sock_unregister(PF_UNIX);
- proto_unregister(&unix_proto);
+ proto_unregister(&unix_dgram_proto);
+ proto_unregister(&unix_stream_proto);
unregister_pernet_subsys(&unix_net_ops);
}
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 20f53575b5c9..b927e2baae50 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -38,9 +38,18 @@ static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
-static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
- size_t len, int nonblock, int flags,
- int *addr_len)
+static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags)
+{
+ if (sk->sk_type == SOCK_DGRAM)
+ return __unix_dgram_recvmsg(sk, msg, len, flags);
+ else
+ return __unix_stream_recvmsg(sk, msg, len, flags);
+}
+
+static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int nonblock, int flags,
+ int *addr_len)
{
struct unix_sock *u = unix_sk(sk);
struct sk_psock *psock;
@@ -48,14 +57,14 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
psock = sk_psock_get(sk);
if (unlikely(!psock))
- return __unix_dgram_recvmsg(sk, msg, len, flags);
+ return __unix_recvmsg(sk, msg, len, flags);
mutex_lock(&u->iolock);
if (!skb_queue_empty(&sk->sk_receive_queue) &&
sk_psock_queue_empty(psock)) {
mutex_unlock(&u->iolock);
sk_psock_put(sk, psock);
- return __unix_dgram_recvmsg(sk, msg, len, flags);
+ return __unix_recvmsg(sk, msg, len, flags);
}
msg_bytes_ready:
@@ -71,7 +80,7 @@ msg_bytes_ready:
goto msg_bytes_ready;
mutex_unlock(&u->iolock);
sk_psock_put(sk, psock);
- return __unix_dgram_recvmsg(sk, msg, len, flags);
+ return __unix_recvmsg(sk, msg, len, flags);
}
copied = -EAGAIN;
}
@@ -80,30 +89,55 @@ msg_bytes_ready:
return copied;
}
-static struct proto *unix_prot_saved __read_mostly;
-static DEFINE_SPINLOCK(unix_prot_lock);
-static struct proto unix_bpf_prot;
+static struct proto *unix_dgram_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_dgram_prot_lock);
+static struct proto unix_dgram_bpf_prot;
+
+static struct proto *unix_stream_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_stream_prot_lock);
+static struct proto unix_stream_bpf_prot;
-static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
*prot = *base;
prot->close = sock_map_close;
- prot->recvmsg = unix_dgram_bpf_recvmsg;
+ prot->recvmsg = unix_bpf_recvmsg;
+}
+
+static void unix_stream_bpf_rebuild_protos(struct proto *prot,
+ const struct proto *base)
+{
+ *prot = *base;
+ prot->close = sock_map_close;
+ prot->recvmsg = unix_bpf_recvmsg;
+ prot->unhash = sock_map_unhash;
+}
+
+static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
+{
+ if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
+ spin_lock_bh(&unix_dgram_prot_lock);
+ if (likely(ops != unix_dgram_prot_saved)) {
+ unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
+ smp_store_release(&unix_dgram_prot_saved, ops);
+ }
+ spin_unlock_bh(&unix_dgram_prot_lock);
+ }
}
-static void unix_bpf_check_needs_rebuild(struct proto *ops)
+static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
{
- if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) {
- spin_lock_bh(&unix_prot_lock);
- if (likely(ops != unix_prot_saved)) {
- unix_bpf_rebuild_protos(&unix_bpf_prot, ops);
- smp_store_release(&unix_prot_saved, ops);
+ if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
+ spin_lock_bh(&unix_stream_prot_lock);
+ if (likely(ops != unix_stream_prot_saved)) {
+ unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
+ smp_store_release(&unix_stream_prot_saved, ops);
}
- spin_unlock_bh(&unix_prot_lock);
+ spin_unlock_bh(&unix_stream_prot_lock);
}
}
-int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
if (sk->sk_type != SOCK_DGRAM)
return -EOPNOTSUPP;
@@ -114,12 +148,27 @@ int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
return 0;
}
- unix_bpf_check_needs_rebuild(psock->sk_proto);
- WRITE_ONCE(sk->sk_prot, &unix_bpf_prot);
+ unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot);
+ return 0;
+}
+
+int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+ if (restore) {
+ sk->sk_write_space = psock->saved_write_space;
+ WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+ return 0;
+ }
+
+ unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
+ WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot);
return 0;
}
void __init unix_bpf_build_proto(void)
{
- unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto);
+ unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
+ unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);
+
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 036998d11ded..4dc20be5fb96 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -39,11 +39,6 @@ tprogs-y += lwt_len_hist
tprogs-y += xdp_tx_iptunnel
tprogs-y += test_map_in_map
tprogs-y += per_socket_stats_example
-tprogs-y += xdp_redirect
-tprogs-y += xdp_redirect_map
-tprogs-y += xdp_redirect_map_multi
-tprogs-y += xdp_redirect_cpu
-tprogs-y += xdp_monitor
tprogs-y += xdp_rxq_info
tprogs-y += syscall_tp
tprogs-y += cpustat
@@ -57,11 +52,18 @@ tprogs-y += xdp_sample_pkts
tprogs-y += ibumad
tprogs-y += hbm
+tprogs-y += xdp_redirect_cpu
+tprogs-y += xdp_redirect_map_multi
+tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect
+tprogs-y += xdp_monitor
+
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o
TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
+XDP_SAMPLE := xdp_sample_user.o
fds_example-objs := fds_example.o
sockex1-objs := sockex1_user.o
@@ -98,11 +100,6 @@ lwt_len_hist-objs := lwt_len_hist_user.o
xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o
test_map_in_map-objs := test_map_in_map_user.o
per_socket_stats_example-objs := cookie_uid_helper_example.o
-xdp_redirect-objs := xdp_redirect_user.o
-xdp_redirect_map-objs := xdp_redirect_map_user.o
-xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o
-xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
-xdp_monitor-objs := xdp_monitor_user.o
xdp_rxq_info-objs := xdp_rxq_info_user.o
syscall_tp-objs := syscall_tp_user.o
cpustat-objs := cpustat_user.o
@@ -116,6 +113,12 @@ xdp_sample_pkts-objs := xdp_sample_pkts_user.o
ibumad-objs := ibumad_user.o
hbm-objs := hbm.o $(CGROUP_HELPERS)
+xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE)
+xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE)
+xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE)
+xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE)
+xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE)
+
# Tell kbuild to always build the programs
always-y := $(tprogs-y)
always-y += sockex1_kern.o
@@ -160,11 +163,6 @@ always-y += tcp_clamp_kern.o
always-y += tcp_basertt_kern.o
always-y += tcp_tos_reflect_kern.o
always-y += tcp_dumpstats_kern.o
-always-y += xdp_redirect_kern.o
-always-y += xdp_redirect_map_kern.o
-always-y += xdp_redirect_map_multi_kern.o
-always-y += xdp_redirect_cpu_kern.o
-always-y += xdp_monitor_kern.o
always-y += xdp_rxq_info_kern.o
always-y += xdp2skb_meta_kern.o
always-y += syscall_tp_kern.o
@@ -276,6 +274,11 @@ $(LIBBPF): FORCE
$(MAKE) -C $(dir $@) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \
LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ O=
+BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool
+BPFTOOL := $(BPFTOOLDIR)/bpftool
+$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)
+ $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../
+
$(obj)/syscall_nrs.h: $(obj)/syscall_nrs.s FORCE
$(call filechk,offsets,__SYSCALL_NRS_H__)
@@ -306,6 +309,12 @@ verify_target_bpf: verify_cmds
$(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF)
$(src)/*.c: verify_target_bpf $(LIBBPF)
+$(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h
+$(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h
+$(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h
+$(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h
+$(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h
+
$(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
$(obj)/hbm.o: $(src)/hbm.h
@@ -313,6 +322,76 @@ $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h
-include $(BPF_SAMPLES_PATH)/Makefile.target
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
+
+$(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+clean-files += vmlinux.h
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+endef
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+
+$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o
+$(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o
+
+$(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h
+ @echo " CLANG-BPF " $@
+ $(Q)$(CLANG) -g -O2 -target bpf -D__TARGET_ARCH_$(SRCARCH) \
+ -Wno-compare-distinct-pointer-types -I$(srctree)/include \
+ -I$(srctree)/samples/bpf -I$(srctree)/tools/include \
+ -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \
+ -c $(filter %.bpf.c,$^) -o $@
+
+LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \
+ xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h
+clean-files += $(LINKED_SKELS)
+
+xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o
+xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o
+xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o
+xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o
+xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o
+
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
+BPF_SRCS_LINKED := $(notdir $(wildcard $(src)/*.bpf.c))
+BPF_OBJS_LINKED := $(patsubst %.bpf.c,$(obj)/%.bpf.o, $(BPF_SRCS_LINKED))
+BPF_SKELS_LINKED := $(addprefix $(obj)/,$(LINKED_SKELS))
+
+$(BPF_SKELS_LINKED): $(BPF_OBJS_LINKED) $(BPFTOOL)
+ @echo " BPF GEN-OBJ " $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen object $(@:.skel.h=.lbpf.o) $(addprefix $(obj)/,$($(@F)-deps))
+ @echo " BPF GEN-SKEL" $(@:.skel.h=)
+ $(Q)$(BPFTOOL) gen skeleton $(@:.skel.h=.lbpf.o) name $(notdir $(@:.skel.h=)) > $@
+
# asm/sysreg.h - inline assembly used by it is incompatible with llvm.
# But, there is no easy way to fix it, so just exclude it since it is
# useless for BPF samples.
diff --git a/samples/bpf/Makefile.target b/samples/bpf/Makefile.target
index 7621f55e2947..5a368affa038 100644
--- a/samples/bpf/Makefile.target
+++ b/samples/bpf/Makefile.target
@@ -73,3 +73,14 @@ quiet_cmd_tprog-cobjs = CC $@
cmd_tprog-cobjs = $(CC) $(tprogc_flags) -c -o $@ $<
$(tprog-cobjs): $(obj)/%.o: $(src)/%.c FORCE
$(call if_changed_dep,tprog-cobjs)
+
+# Override includes for xdp_sample_user.o because $(srctree)/usr/include in
+# TPROGS_CFLAGS causes conflicts
+XDP_SAMPLE_CFLAGS += -Wall -O2 -lm \
+ -I./tools/include \
+ -I./tools/include/uapi \
+ -I./tools/lib \
+ -I./tools/testing/selftests/bpf
+$(obj)/xdp_sample_user.o: $(src)/xdp_sample_user.c \
+ $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h
+ $(CC) $(XDP_SAMPLE_CFLAGS) -c -o $@ $<
diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c
index cc3bce8d3aac..54958802c032 100644
--- a/samples/bpf/cookie_uid_helper_example.c
+++ b/samples/bpf/cookie_uid_helper_example.c
@@ -167,7 +167,7 @@ static void prog_load(void)
static void prog_attach_iptables(char *file)
{
int ret;
- char rules[100];
+ char rules[256];
if (bpf_obj_pin(prog_fd, file))
error(1, errno, "bpf_obj_pin");
@@ -175,8 +175,13 @@ static void prog_attach_iptables(char *file)
printf("file path too long: %s\n", file);
exit(1);
}
- sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
- file);
+ ret = snprintf(rules, sizeof(rules),
+ "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT",
+ file);
+ if (ret < 0 || ret >= sizeof(rules)) {
+ printf("error constructing iptables command\n");
+ exit(1);
+ }
ret = system(rules);
if (ret < 0) {
printf("iptables rule update failed: %d/n", WEXITSTATUS(ret));
diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index 14b792915a9c..4866afd054da 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -20,6 +20,7 @@
})
#define MINBLOCK_US 1
+#define MAX_ENTRIES 10000
struct key_t {
char waker[TASK_COMM_LEN];
@@ -32,14 +33,14 @@ struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct key_t);
__type(value, u64);
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} counts SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, u64);
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} start SEC(".maps");
struct wokeby_t {
@@ -51,14 +52,14 @@ struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, struct wokeby_t);
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} wokeby SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(key_size, sizeof(u32));
__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
- __uint(max_entries, 10000);
+ __uint(max_entries, MAX_ENTRIES);
} stackmap SEC(".maps");
#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index cea399424bca..566e6440e8c2 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -32,7 +32,7 @@ static void print_old_objects(int fd)
__u64 key, next_key;
struct pair v;
- key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
+ key = write(1, "\e[1;1H\e[2J", 11); /* clear screen */
key = -1;
while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
diff --git a/samples/bpf/xdp_monitor.bpf.c b/samples/bpf/xdp_monitor.bpf.c
new file mode 100644
index 000000000000..cfb41e2205f4
--- /dev/null
+++ b/samples/bpf/xdp_monitor.bpf.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * XDP monitor tool, based on tracepoints
+ */
+#include "xdp_sample.bpf.h"
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
deleted file mode 100644
index 5c955b812c47..000000000000
--- a/samples/bpf/xdp_monitor_kern.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc.
- *
- * XDP monitor tool, based on tracepoints
- */
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, u64);
- __uint(max_entries, 2);
- /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-#define XDP_UNKNOWN XDP_REDIRECT + 1
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, u64);
- __uint(max_entries, XDP_UNKNOWN + 1);
-} exception_cnt SEC(".maps");
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12 size:4; signed:0;
- int ifindex; // offset:16 size:4; signed:1;
- int err; // offset:20 size:4; signed:1;
- int to_ifindex; // offset:24 size:4; signed:1;
- u32 map_id; // offset:28 size:4; signed:0;
- int map_index; // offset:32 size:4; signed:1;
-}; // offset:36
-
-enum {
- XDP_REDIRECT_SUCCESS = 0,
- XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
-{
- u32 key = XDP_REDIRECT_ERROR;
- int err = ctx->err;
- u64 *cnt;
-
- if (!err)
- key = XDP_REDIRECT_SUCCESS;
-
- cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key);
- if (!cnt)
- return 1;
- *cnt += 1;
-
- return 0; /* Indicate event was filtered (no further processing)*/
- /*
- * Returning 1 here would allow e.g. a perf-record tracepoint
- * to see and record these events, but it doesn't work well
- * in-practice as stopping perf-record also unload this
- * bpf_prog. Plus, there is additional overhead of doing so.
- */
-}
-
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect")
-int trace_xdp_redirect(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Likely unloaded when prog starts */
-SEC("tracepoint/xdp/xdp_redirect_map")
-int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
-{
- return xdp_redirect_collect_stat(ctx);
-}
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int ifindex; // offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
-{
- u64 *cnt;
- u32 key;
-
- key = ctx->act;
- if (key > XDP_REDIRECT)
- key = XDP_UNKNOWN;
-
- cnt = bpf_map_lookup_elem(&exception_cnt, &key);
- if (!cnt)
- return 1;
- *cnt += 1;
-
- return 0;
-}
-
-/* Common stats data record shared with _user.c */
-struct datarec {
- u64 processed;
- u64 dropped;
- u64 info;
- u64 err;
-};
-#define MAX_CPUS 64
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int to_cpu; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
- u32 to_cpu = ctx->to_cpu;
- struct datarec *rec;
-
- if (to_cpu >= MAX_CPUS)
- return 1;
-
- rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- if (ctx->processed > 0)
- rec->info += 1;
-
- return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Count times kthread yielded CPU via schedule call */
- if (ctx->sched)
- rec->info++;
-
- return 0;
-}
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} devmap_xmit_cnt SEC(".maps");
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct devmap_xmit_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int from_ifindex; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int to_ifindex; // offset:16; size:4; signed:1;
- int drops; // offset:20; size:4; signed:1;
- int sent; // offset:24; size:4; signed:1;
- int err; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_devmap_xmit")
-int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->sent;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- rec->info += 1;
-
- /* Record error cases, where no frame were sent */
- if (ctx->err)
- rec->err++;
-
- /* Catch API error of drv ndo_xdp_xmit sent more than count */
- if (ctx->drops < 0)
- rec->err++;
-
- return 1;
-}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index 49ebc49aefc3..fb9391a5ec62 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -1,15 +1,12 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
static const char *__doc__=
- "XDP monitor tool, based on tracepoints\n"
-;
+"XDP monitor tool, based on tracepoints\n";
static const char *__doc_err_only__=
- " NOTICE: Only tracking XDP redirect errors\n"
- " Enable TX success stats via '--stats'\n"
- " (which comes with a per packet processing overhead)\n"
-;
+" NOTICE: Only tracking XDP redirect errors\n"
+" Enable redirect success stats via '-s/--stats'\n"
+" (which comes with a per packet processing overhead)\n";
#include <errno.h>
#include <stdio.h>
@@ -20,768 +17,103 @@ static const char *__doc_err_only__=
#include <ctype.h>
#include <unistd.h>
#include <locale.h>
-
#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
-
#include <signal.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_monitor.skel.h"
-enum map_type {
- REDIRECT_ERR_CNT,
- EXCEPTION_CNT,
- CPUMAP_ENQUEUE_CNT,
- CPUMAP_KTHREAD_CNT,
- DEVMAP_XMIT_CNT,
-};
+static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT |
+ SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-static const char *const map_type_strings[] = {
- [REDIRECT_ERR_CNT] = "redirect_err_cnt",
- [EXCEPTION_CNT] = "exception_cnt",
- [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
- [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
- [DEVMAP_XMIT_CNT] = "devmap_xmit_cnt",
-};
-
-#define NUM_MAP 5
-#define NUM_TP 8
-
-static int tp_cnt;
-static int map_cnt;
-static int verbose = 1;
-static bool debug = false;
-struct bpf_map *map_data[NUM_MAP] = {};
-struct bpf_link *tp_links[NUM_TP] = {};
-struct bpf_object *obj;
+DEFINE_SAMPLE_INIT(xdp_monitor);
static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"debug", no_argument, NULL, 'D' },
- {"stats", no_argument, NULL, 'S' },
- {"sec", required_argument, NULL, 's' },
- {0, 0, NULL, 0 }
-};
-
-static void int_exit(int sig)
-{
- /* Detach tracepoints */
- while (tp_cnt)
- bpf_link__destroy(tp_links[--tp_cnt]);
-
- bpf_object__close(obj);
- exit(0);
-}
-
-/* C standard specifies two constants, EXIT_SUCCESS(0) and EXIT_FAILURE(1) */
-#define EXIT_FAIL_MEM 5
-
-static void usage(char *argv[])
-{
- int i;
- printf("\nDOCUMENTATION:\n%s\n", __doc__);
- printf("\n");
- printf(" Usage: %s (options-see-below)\n",
- argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-15s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)",
- *long_options[i].flag);
- else
- printf("short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n");
-}
-
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
- struct timespec t;
- int res;
-
- res = clock_gettime(CLOCK_MONOTONIC, &t);
- if (res < 0) {
- fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- exit(EXIT_FAILURE);
- }
- return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-enum {
- REDIR_SUCCESS = 0,
- REDIR_ERROR = 1,
-};
-#define REDIR_RES_MAX 2
-static const char *redir_names[REDIR_RES_MAX] = {
- [REDIR_SUCCESS] = "Success",
- [REDIR_ERROR] = "Error",
-};
-static const char *err2str(int err)
-{
- if (err < REDIR_RES_MAX)
- return redir_names[err];
- return NULL;
-}
-/* enum xdp_action */
-#define XDP_UNKNOWN XDP_REDIRECT + 1
-#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
-static const char *xdp_action_names[XDP_ACTION_MAX] = {
- [XDP_ABORTED] = "XDP_ABORTED",
- [XDP_DROP] = "XDP_DROP",
- [XDP_PASS] = "XDP_PASS",
- [XDP_TX] = "XDP_TX",
- [XDP_REDIRECT] = "XDP_REDIRECT",
- [XDP_UNKNOWN] = "XDP_UNKNOWN",
-};
-static const char *action2str(int action)
-{
- if (action < XDP_ACTION_MAX)
- return xdp_action_names[action];
- return NULL;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 info;
- __u64 err;
-};
-#define MAX_CPUS 64
-
-/* Userspace structs for collection of stats from maps */
-struct record {
- __u64 timestamp;
- struct datarec total;
- struct datarec *cpu;
+ { "help", no_argument, NULL, 'h' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
};
-struct u64rec {
- __u64 processed;
-};
-struct record_u64 {
- /* record for _kern side __u64 values */
- __u64 timestamp;
- struct u64rec total;
- struct u64rec *cpu;
-};
-
-struct stats_record {
- struct record_u64 xdp_redirect[REDIR_RES_MAX];
- struct record_u64 xdp_exception[XDP_ACTION_MAX];
- struct record xdp_cpumap_kthread;
- struct record xdp_cpumap_enqueue[MAX_CPUS];
- struct record xdp_devmap_xmit;
-};
-
-static bool map_collect_record(int fd, __u32 key, struct record *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec values[nr_cpus];
- __u64 sum_processed = 0;
- __u64 sum_dropped = 0;
- __u64 sum_info = 0;
- __u64 sum_err = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_processed += values[i].processed;
- rec->cpu[i].dropped = values[i].dropped;
- sum_dropped += values[i].dropped;
- rec->cpu[i].info = values[i].info;
- sum_info += values[i].info;
- rec->cpu[i].err = values[i].err;
- sum_err += values[i].err;
- }
- rec->total.processed = sum_processed;
- rec->total.dropped = sum_dropped;
- rec->total.info = sum_info;
- rec->total.err = sum_err;
- return true;
-}
-
-static bool map_collect_record_u64(int fd, __u32 key, struct record_u64 *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct u64rec values[nr_cpus];
- __u64 sum_total = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_total += values[i].processed;
- }
- rec->total.processed = sum_total;
- return true;
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static double calc_period_u64(struct record_u64 *r, struct record_u64 *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static double calc_pps(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->processed - p->processed;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_pps_u64(struct u64rec *r, struct u64rec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->processed - p->processed;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_drop(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->dropped - p->dropped;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_info(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->info - p->info;
- pps = packets / period;
- }
- return pps;
-}
-
-static double calc_err(struct datarec *r, struct datarec *p, double period)
-{
- __u64 packets = 0;
- double pps = 0;
-
- if (period > 0) {
- packets = r->err - p->err;
- pps = packets / period;
- }
- return pps;
-}
-
-static void stats_print(struct stats_record *stats_rec,
- struct stats_record *stats_prev,
- bool err_only)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- int rec_i = 0, i, to_cpu;
- double t = 0, pps = 0;
-
- /* Header */
- printf("%-15s %-7s %-12s %-12s %-9s\n",
- "XDP-event", "CPU:to", "pps", "drop-pps", "extra-info");
-
- /* tracepoint: xdp:xdp_redirect_* */
- if (err_only)
- rec_i = REDIR_ERROR;
-
- for (; rec_i < REDIR_RES_MAX; rec_i++) {
- struct record_u64 *rec, *prev;
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
- rec = &stats_rec->xdp_redirect[rec_i];
- prev = &stats_prev->xdp_redirect[rec_i];
- t = calc_period_u64(rec, prev);
-
- for (i = 0; i < nr_cpus; i++) {
- struct u64rec *r = &rec->cpu[i];
- struct u64rec *p = &prev->cpu[i];
-
- pps = calc_pps_u64(r, p, t);
- if (pps > 0)
- printf(fmt1, "XDP_REDIRECT", i,
- rec_i ? 0.0: pps, rec_i ? pps : 0.0,
- err2str(rec_i));
- }
- pps = calc_pps_u64(&rec->total, &prev->total, t);
- printf(fmt2, "XDP_REDIRECT", "total",
- rec_i ? 0.0: pps, rec_i ? pps : 0.0, err2str(rec_i));
- }
-
- /* tracepoint: xdp:xdp_exception */
- for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
- struct record_u64 *rec, *prev;
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %s\n";
-
- rec = &stats_rec->xdp_exception[rec_i];
- prev = &stats_prev->xdp_exception[rec_i];
- t = calc_period_u64(rec, prev);
-
- for (i = 0; i < nr_cpus; i++) {
- struct u64rec *r = &rec->cpu[i];
- struct u64rec *p = &prev->cpu[i];
-
- pps = calc_pps_u64(r, p, t);
- if (pps > 0)
- printf(fmt1, "Exception", i,
- 0.0, pps, action2str(rec_i));
- }
- pps = calc_pps_u64(&rec->total, &prev->total, t);
- if (pps > 0)
- printf(fmt2, "Exception", "total",
- 0.0, pps, action2str(rec_i));
- }
-
- /* cpumap enqueue stats */
- for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
- char *fmt1 = "%-15s %3d:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
- char *fmt2 = "%-15s %3s:%-3d %'-12.0f %'-12.0f %'-10.2f %s\n";
- struct record *rec, *prev;
- char *info_str = "";
- double drop, info;
-
- rec = &stats_rec->xdp_cpumap_enqueue[to_cpu];
- prev = &stats_prev->xdp_cpumap_enqueue[to_cpu];
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- if (info > 0) {
- info_str = "bulk-average";
- info = pps / info; /* calc average bulk size */
- }
- if (pps > 0)
- printf(fmt1, "cpumap-enqueue",
- i, to_cpu, pps, drop, info, info_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- if (pps > 0) {
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- if (info > 0) {
- info_str = "bulk-average";
- info = pps / info; /* calc average bulk size */
- }
- printf(fmt2, "cpumap-enqueue",
- "sum", to_cpu, pps, drop, info, info_str);
- }
- }
-
- /* cpumap kthread stats */
- {
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.0f %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.0f %s\n";
- struct record *rec, *prev;
- double drop, info;
- char *i_str = "";
-
- rec = &stats_rec->xdp_cpumap_kthread;
- prev = &stats_prev->xdp_cpumap_kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- if (info > 0)
- i_str = "sched";
- if (pps > 0 || drop > 0)
- printf(fmt1, "cpumap-kthread",
- i, pps, drop, info, i_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- if (info > 0)
- i_str = "sched-sum";
- printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
- }
-
- /* devmap ndo_xdp_xmit stats */
- {
- char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
- char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
- struct record *rec, *prev;
- double drop, info, err;
- char *i_str = "";
- char *err_str = "";
-
- rec = &stats_rec->xdp_devmap_xmit;
- prev = &stats_prev->xdp_devmap_xmit;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop(r, p, t);
- info = calc_info(r, p, t);
- err = calc_err(r, p, t);
- if (info > 0) {
- i_str = "bulk-average";
- info = (pps+drop) / info; /* calc avg bulk */
- }
- if (err > 0)
- err_str = "drv-err";
- if (pps > 0 || drop > 0)
- printf(fmt1, "devmap-xmit",
- i, pps, drop, info, i_str, err_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop(&rec->total, &prev->total, t);
- info = calc_info(&rec->total, &prev->total, t);
- err = calc_err(&rec->total, &prev->total, t);
- if (info > 0) {
- i_str = "bulk-average";
- info = (pps+drop) / info; /* calc avg bulk */
- }
- if (err > 0)
- err_str = "drv-err";
- printf(fmt2, "devmap-xmit", "total", pps, drop,
- info, i_str, err_str);
- }
-
- printf("\n");
-}
-
-static bool stats_collect(struct stats_record *rec)
-{
- int fd;
- int i;
-
- /* TODO: Detect if someone unloaded the perf event_fd's, as
- * this can happen by someone running perf-record -e
- */
-
- fd = bpf_map__fd(map_data[REDIRECT_ERR_CNT]);
- for (i = 0; i < REDIR_RES_MAX; i++)
- map_collect_record_u64(fd, i, &rec->xdp_redirect[i]);
-
- fd = bpf_map__fd(map_data[EXCEPTION_CNT]);
- for (i = 0; i < XDP_ACTION_MAX; i++) {
- map_collect_record_u64(fd, i, &rec->xdp_exception[i]);
- }
-
- fd = bpf_map__fd(map_data[CPUMAP_ENQUEUE_CNT]);
- for (i = 0; i < MAX_CPUS; i++)
- map_collect_record(fd, i, &rec->xdp_cpumap_enqueue[i]);
-
- fd = bpf_map__fd(map_data[CPUMAP_KTHREAD_CNT]);
- map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
-
- fd = bpf_map__fd(map_data[DEVMAP_XMIT_CNT]);
- map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
-
- return true;
-}
-
-static void *alloc_rec_per_cpu(int record_size)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- void *array;
-
- array = calloc(nr_cpus, record_size);
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- exit(EXIT_FAIL_MEM);
- }
- return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
- struct stats_record *rec;
- int rec_sz;
- int i;
-
- /* Alloc main stats_record structure */
- rec = calloc(1, sizeof(*rec));
- if (!rec) {
- fprintf(stderr, "Mem alloc error\n");
- exit(EXIT_FAIL_MEM);
- }
-
- /* Alloc stats stored per CPU for each record */
- rec_sz = sizeof(struct u64rec);
- for (i = 0; i < REDIR_RES_MAX; i++)
- rec->xdp_redirect[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- for (i = 0; i < XDP_ACTION_MAX; i++)
- rec->xdp_exception[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- rec_sz = sizeof(struct datarec);
- rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
- rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
-
- for (i = 0; i < MAX_CPUS; i++)
- rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
-
- return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
- int i;
-
- for (i = 0; i < REDIR_RES_MAX; i++)
- free(r->xdp_redirect[i].cpu);
-
- for (i = 0; i < XDP_ACTION_MAX; i++)
- free(r->xdp_exception[i].cpu);
-
- free(r->xdp_cpumap_kthread.cpu);
- free(r->xdp_devmap_xmit.cpu);
-
- for (i = 0; i < MAX_CPUS; i++)
- free(r->xdp_cpumap_enqueue[i].cpu);
-
- free(r);
-}
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
-{
- struct stats_record *tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-static void stats_poll(int interval, bool err_only)
-{
- struct stats_record *rec, *prev;
-
- rec = alloc_stats_record();
- prev = alloc_stats_record();
- stats_collect(rec);
-
- if (err_only)
- printf("\n%s\n", __doc_err_only__);
-
- /* Trick to pretty printf with thousands separators use %' */
- setlocale(LC_NUMERIC, "en_US");
-
- /* Header */
- if (verbose)
- printf("\n%s", __doc__);
-
- /* TODO Need more advanced stats on error types */
- if (verbose) {
- printf(" - Stats map0: %s\n", bpf_map__name(map_data[0]));
- printf(" - Stats map1: %s\n", bpf_map__name(map_data[1]));
- printf("\n");
- }
- fflush(stdout);
-
- while (1) {
- swap(&prev, &rec);
- stats_collect(rec);
- stats_print(rec, prev, err_only);
- fflush(stdout);
- sleep(interval);
- }
-
- free_stats_record(rec);
- free_stats_record(prev);
-}
-
-static void print_bpf_prog_info(void)
-{
- struct bpf_program *prog;
- struct bpf_map *map;
- int i = 0;
-
- /* Prog info */
- printf("Loaded BPF prog have %d bpf program(s)\n", tp_cnt);
- bpf_object__for_each_program(prog, obj) {
- printf(" - prog_fd[%d] = fd(%d)\n", i, bpf_program__fd(prog));
- i++;
- }
-
- i = 0;
- /* Maps info */
- printf("Loaded BPF prog have %d map(s)\n", map_cnt);
- bpf_object__for_each_map(map, obj) {
- const char *name = bpf_map__name(map);
- int fd = bpf_map__fd(map);
-
- printf(" - map_data[%d] = fd(%d) name:%s\n", i, fd, name);
- i++;
- }
-
- /* Event info */
- printf("Searching for (max:%d) event file descriptor(s)\n", tp_cnt);
- for (i = 0; i < tp_cnt; i++) {
- int fd = bpf_link__fd(tp_links[i]);
-
- if (fd != -1)
- printf(" - event_fd[%d] = fd(%d)\n", i, fd);
- }
-}
int main(int argc, char **argv)
{
- struct bpf_program *prog;
- int longindex = 0, opt;
- int ret = EXIT_FAILURE;
- enum map_type type;
- char filename[256];
-
- /* Default settings: */
+ unsigned long interval = 2;
+ int ret = EXIT_FAIL_OPTION;
+ struct xdp_monitor *skel;
bool errors_only = true;
- int interval = 2;
+ int longindex = 0, opt;
+ bool error = true;
/* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hDSs:",
+ while ((opt = getopt_long(argc, argv, "si:vh",
long_options, &longindex)) != -1) {
switch (opt) {
- case 'D':
- debug = true;
- break;
- case 'S':
+ case 's':
errors_only = false;
+ mask |= SAMPLE_REDIRECT_CNT;
break;
- case 's':
- interval = atoi(optarg);
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
break;
case 'h':
+ error = false;
default:
- usage(argv);
+ sample_usage(argv, long_options, __doc__, mask, error);
return ret;
}
}
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- /* Remove tracepoint program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
-
- obj = bpf_object__open_file(filename, NULL);
- if (libbpf_get_error(obj)) {
- printf("ERROR: opening BPF object file failed\n");
- obj = NULL;
- goto cleanup;
- }
-
- /* load BPF program */
- if (bpf_object__load(obj)) {
- printf("ERROR: loading BPF object file failed\n");
- goto cleanup;
+ skel = xdp_monitor__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_monitor__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- for (type = 0; type < NUM_MAP; type++) {
- map_data[type] =
- bpf_object__find_map_by_name(obj, map_type_strings[type]);
-
- if (libbpf_get_error(map_data[type])) {
- printf("ERROR: finding a map in obj file failed\n");
- goto cleanup;
- }
- map_cnt++;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- bpf_object__for_each_program(prog, obj) {
- tp_links[tp_cnt] = bpf_program__attach(prog);
- if (libbpf_get_error(tp_links[tp_cnt])) {
- printf("ERROR: bpf_program__attach failed\n");
- tp_links[tp_cnt] = NULL;
- goto cleanup;
- }
- tp_cnt++;
+ ret = xdp_monitor__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (debug) {
- print_bpf_prog_info();
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- /* Unload/stop tracepoint event by closing bpf_link's */
- if (errors_only) {
- /* The bpf_link[i] depend on the order of
- * the functions was defined in _kern.c
- */
- bpf_link__destroy(tp_links[2]); /* tracepoint/xdp/xdp_redirect */
- tp_links[2] = NULL;
+ if (errors_only)
+ printf("%s", __doc_err_only__);
- bpf_link__destroy(tp_links[3]); /* tracepoint/xdp/xdp_redirect_map */
- tp_links[3] = NULL;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
-
- stats_poll(interval, errors_only);
-
- ret = EXIT_SUCCESS;
-
-cleanup:
- /* Detach tracepoints */
- while (tp_cnt)
- bpf_link__destroy(tp_links[--tp_cnt]);
-
- bpf_object__close(obj);
- return ret;
+ ret = EXIT_OK;
+end_destroy:
+ xdp_monitor__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect.bpf.c b/samples/bpf/xdp_redirect.bpf.c
new file mode 100644
index 000000000000..7c02bacfe96b
--- /dev/null
+++ b/samples/bpf/xdp_redirect.bpf.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+const volatile int ifindex_out;
+
+SEC("xdp")
+int xdp_redirect_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ swap_src_dst_mac(data);
+ return bpf_redirect(ifindex_out, 0);
+}
+
+/* Redirect require an XDP bpf_prog loaded on the TX device */
+SEC("xdp")
+int xdp_redirect_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu.bpf.c
index 8255025dea97..f10fe3cf25f6 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu.bpf.c
@@ -2,74 +2,18 @@
*
* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/if_vlan.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/udp.h>
-
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
#include "hash_func01.h"
-#define MAX_CPUS NR_CPUS
-
/* Special map type that can XDP_REDIRECT frames to another CPU */
struct {
__uint(type, BPF_MAP_TYPE_CPUMAP);
__uint(key_size, sizeof(u32));
__uint(value_size, sizeof(struct bpf_cpumap_val));
- __uint(max_entries, MAX_CPUS);
} cpu_map SEC(".maps");
-/* Common stats data record to keep userspace more simple */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 issue;
- __u64 xdp_pass;
- __u64 xdp_drop;
- __u64 xdp_redirect;
-};
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} rx_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 2);
- /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
- __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
/* Set of maps controlling available CPU, and for iterating through
* selectable redirect CPUs.
*/
@@ -77,14 +21,15 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, u32);
- __uint(max_entries, MAX_CPUS);
} cpus_available SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(key, u32);
__type(value, u32);
__uint(max_entries, 1);
} cpus_count SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__type(key, u32);
@@ -92,24 +37,16 @@ struct {
__uint(max_entries, 1);
} cpus_iterator SEC(".maps");
-/* Used by trace point */
struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, struct datarec);
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
__uint(max_entries, 1);
-} exception_cnt SEC(".maps");
+} tx_port SEC(".maps");
-/* Helper parse functions */
+char tx_mac_addr[ETH_ALEN];
-/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
- *
- * Returns false on error and non-supported ether-type
- */
-struct vlan_hdr {
- __be16 h_vlan_TCI;
- __be16 h_vlan_encapsulated_proto;
-};
+/* Helper parse functions */
static __always_inline
bool parse_eth(struct ethhdr *eth, void *data_end,
@@ -125,11 +62,12 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = eth->h_proto;
/* Skip non 802.3 Ethertypes */
- if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
+ if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
return false;
/* Handle VLAN tagged packet */
- if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
struct vlan_hdr *vlan_hdr;
vlan_hdr = (void *)eth + offset;
@@ -139,7 +77,8 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
/* Handle double VLAN tagged packet */
- if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
struct vlan_hdr *vlan_hdr;
vlan_hdr = (void *)eth + offset;
@@ -149,7 +88,7 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
}
- *eth_proto = ntohs(eth_type);
+ *eth_proto = bpf_ntohs(eth_type);
*l3_offset = offset;
return true;
}
@@ -172,7 +111,7 @@ u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
if (udph + 1 > data_end)
return 0;
- dport = ntohs(udph->dest);
+ dport = bpf_ntohs(udph->dest);
return dport;
}
@@ -200,50 +139,48 @@ int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
return ip6h->nexthdr;
}
-SEC("xdp_cpu_map0")
+SEC("xdp")
int xdp_prognum0_no_touch(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct datarec *rec;
u32 *cpu_selected;
- u32 cpu_dest;
- u32 key = 0;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
/* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
if (!cpu_selected)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map1_touch_data")
+SEC("xdp")
int xdp_prognum1_touch_data(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
struct datarec *rec;
u32 *cpu_selected;
- u32 cpu_dest;
+ u32 cpu_dest = 0;
+ u32 key0 = 0;
u16 eth_type;
- u32 key = 0;
/* Only use first entry in cpus_available */
- cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
if (!cpu_selected)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
@@ -252,36 +189,33 @@ int xdp_prognum1_touch_data(struct xdp_md *ctx)
if (eth + 1 > data_end)
return XDP_ABORTED;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
/* Read packet data, and use it (drop non 802.3 Ethertypes) */
eth_type = eth->h_proto;
- if (ntohs(eth_type) < ETH_P_802_3_MIN) {
- rec->dropped++;
+ if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
+ NO_TEAR_INC(rec->dropped);
return XDP_DROP;
}
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map2_round_robin")
+SEC("xdp")
int xdp_prognum2_round_robin(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
+ u32 key = bpf_get_smp_processor_id();
struct datarec *rec;
- u32 cpu_dest;
- u32 *cpu_lookup;
+ u32 cpu_dest = 0;
u32 key0 = 0;
u32 *cpu_selected;
@@ -307,40 +241,37 @@ int xdp_prognum2_round_robin(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_selected;
- /* Count RX packet in map */
- rec = bpf_map_lookup_elem(&rx_cnt, &key0);
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map3_proto_separate")
+SEC("xdp")
int xdp_prognum3_proto_separate(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
- u32 cpu_idx = 0;
u32 *cpu_lookup;
- u32 key = 0;
+ u32 cpu_idx = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
return XDP_PASS; /* Just skip */
@@ -381,35 +312,33 @@ int xdp_prognum3_proto_separate(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-SEC("xdp_cpu_map4_ddos_filter_pktgen")
+SEC("xdp")
int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
+ u32 *cpu_lookup;
u32 cpu_idx = 0;
u16 dest_port;
- u32 *cpu_lookup;
- u32 key = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
return XDP_PASS; /* Just skip */
@@ -443,8 +372,7 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
/* DDoS filter UDP port 9 (pktgen) */
dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
if (dest_port == 9) {
- if (rec)
- rec->dropped++;
+ NO_TEAR_INC(rec->dropped);
return XDP_DROP;
}
break;
@@ -457,11 +385,10 @@ int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
@@ -496,10 +423,10 @@ u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
if (ip6h + 1 > data_end)
return 0;
- cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
- cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
- cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
- cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
+ cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
return cpu_hash;
@@ -509,30 +436,29 @@ u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
* hashing scheme is symmetric, meaning swapping IP src/dest still hit
* same CPU.
*/
-SEC("xdp_cpu_map5_lb_hash_ip_pairs")
+SEC("xdp")
int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
- u8 ip_proto = IPPROTO_UDP;
struct datarec *rec;
u16 eth_proto = 0;
u64 l3_offset = 0;
u32 cpu_dest = 0;
u32 cpu_idx = 0;
u32 *cpu_lookup;
+ u32 key0 = 0;
u32 *cpu_max;
u32 cpu_hash;
- u32 key = 0;
- /* Count RX packet in map */
rec = bpf_map_lookup_elem(&rx_cnt, &key);
if (!rec)
- return XDP_ABORTED;
- rec->processed++;
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
if (!cpu_max)
return XDP_ABORTED;
@@ -560,171 +486,56 @@ int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
return XDP_ABORTED;
cpu_dest = *cpu_lookup;
- if (cpu_dest >= MAX_CPUS) {
- rec->issue++;
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
return XDP_ABORTED;
}
-
return bpf_redirect_map(&cpu_map, cpu_dest, 0);
}
-char _license[] SEC("license") = "GPL";
-
-/*** Trace point code ***/
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12 size:4; signed:0;
- int ifindex; // offset:16 size:4; signed:1;
- int err; // offset:20 size:4; signed:1;
- int to_ifindex; // offset:24 size:4; signed:1;
- u32 map_id; // offset:28 size:4; signed:0;
- int map_index; // offset:32 size:4; signed:1;
-}; // offset:36
-
-enum {
- XDP_REDIRECT_SUCCESS = 0,
- XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
+SEC("xdp_cpumap/redirect")
+int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
{
- u32 key = XDP_REDIRECT_ERROR;
- struct datarec *rec;
- int err = ctx->err;
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
- if (!err)
- key = XDP_REDIRECT_SUCCESS;
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
- rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
- if (!rec)
- return 0;
- rec->dropped += 1;
-
- return 0; /* Indicate event was filtered (no further processing)*/
- /*
- * Returning 1 here would allow e.g. a perf-record tracepoint
- * to see and record these events, but it doesn't work well
- * in-practice as stopping perf-record also unload this
- * bpf_prog. Plus, there is additional overhead of doing so.
- */
+ swap_src_dst_mac(data);
+ return bpf_redirect_map(&tx_port, 0, 0);
}
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
+SEC("xdp_cpumap/pass")
+int xdp_redirect_cpu_pass(struct xdp_md *ctx)
{
- return xdp_redirect_collect_stat(ctx);
+ return XDP_PASS;
}
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
+SEC("xdp_cpumap/drop")
+int xdp_redirect_cpu_drop(struct xdp_md *ctx)
{
- return xdp_redirect_collect_stat(ctx);
+ return XDP_DROP;
}
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int ifindex; // offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
+SEC("xdp_devmap/egress")
+int xdp_redirect_egress_prog(struct xdp_md *ctx)
{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&exception_cnt, &key);
- if (!rec)
- return 1;
- rec->dropped += 1;
-
- return 0;
-}
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u64 nh_off;
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
- u64 __pad; // First 8 bytes are not accessible by bpf code
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int to_cpu; // offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
- u32 to_cpu = ctx->to_cpu;
- struct datarec *rec;
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
- if (to_cpu >= MAX_CPUS)
- return 1;
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
- rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
-
- /* Record bulk events, then userspace can calc average bulk size */
- if (ctx->processed > 0)
- rec->issue += 1;
-
- /* Inception: It's possible to detect overload situations, via
- * this tracepoint. This can be used for creating a feedback
- * loop to XDP, which can take appropriate actions to mitigate
- * this overload situation.
- */
- return 0;
+ return XDP_PASS;
}
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in: kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
- u64 __pad; // First 8 bytes are not accessible
- int map_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12; size:4; signed:0;
- int cpu; // offset:16; size:4; signed:1;
- unsigned int drops; // offset:20; size:4; signed:0;
- unsigned int processed; // offset:24; size:4; signed:0;
- int sched; // offset:28; size:4; signed:1;
- unsigned int xdp_pass; // offset:32; size:4; signed:0;
- unsigned int xdp_drop; // offset:36; size:4; signed:0;
- unsigned int xdp_redirect; // offset:40; size:4; signed:0;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
- struct datarec *rec;
- u32 key = 0;
-
- rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
- if (!rec)
- return 0;
- rec->processed += ctx->processed;
- rec->dropped += ctx->drops;
- rec->xdp_pass += ctx->xdp_pass;
- rec->xdp_drop += ctx->xdp_drop;
- rec->xdp_redirect += ctx->xdp_redirect;
-
- /* Count times kthread yielded CPU via schedule call */
- if (ctx->sched)
- rec->issue++;
-
- return 0;
-}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 9e225c96b77e..6e25fba64c72 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -2,7 +2,16 @@
/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
static const char *__doc__ =
- " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
+"XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n"
+"Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n"
+"Valid specification for CPUMAP BPF program:\n"
+" --mprog-name/-e pass (use built-in XDP_PASS program)\n"
+" --mprog-name/-e drop (use built-in XDP_DROP program)\n"
+" --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n"
+" Custom CPUMAP BPF program:\n"
+" --mprog-filename/-f <filename> --mprog-name/-e <program>\n"
+" Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n"
+" to configure DEVMAP in BPF object <filename>\n";
#include <errno.h>
#include <signal.h>
@@ -18,558 +27,62 @@ static const char *__doc__ =
#include <net/if.h>
#include <time.h>
#include <linux/limits.h>
-
#include <arpa/inet.h>
#include <linux/if_link.h>
-
-/* How many xdp_progs are defined in _kern.c */
-#define MAX_PROG 6
-
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-
#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_cpu.skel.h"
-static int ifindex = -1;
-static char ifname_buf[IF_NAMESIZE];
-static char *ifname;
-static __u32 prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int n_cpus;
-
-enum map_type {
- CPU_MAP,
- RX_CNT,
- REDIRECT_ERR_CNT,
- CPUMAP_ENQUEUE_CNT,
- CPUMAP_KTHREAD_CNT,
- CPUS_AVAILABLE,
- CPUS_COUNT,
- CPUS_ITERATOR,
- EXCEPTION_CNT,
-};
+static int map_fd;
+static int avail_fd;
+static int count_fd;
-static const char *const map_type_strings[] = {
- [CPU_MAP] = "cpu_map",
- [RX_CNT] = "rx_cnt",
- [REDIRECT_ERR_CNT] = "redirect_err_cnt",
- [CPUMAP_ENQUEUE_CNT] = "cpumap_enqueue_cnt",
- [CPUMAP_KTHREAD_CNT] = "cpumap_kthread_cnt",
- [CPUS_AVAILABLE] = "cpus_available",
- [CPUS_COUNT] = "cpus_count",
- [CPUS_ITERATOR] = "cpus_iterator",
- [EXCEPTION_CNT] = "exception_cnt",
-};
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
+ SAMPLE_EXCEPTION_CNT;
-#define NUM_TP 5
-#define NUM_MAP 9
-struct bpf_link *tp_links[NUM_TP] = {};
-static int map_fds[NUM_MAP];
-static int tp_cnt = 0;
-
-/* Exit return codes */
-#define EXIT_OK 0
-#define EXIT_FAIL 1
-#define EXIT_FAIL_OPTION 2
-#define EXIT_FAIL_XDP 3
-#define EXIT_FAIL_BPF 4
-#define EXIT_FAIL_MEM 5
+DEFINE_SAMPLE_INIT(xdp_redirect_cpu);
static const struct option long_options[] = {
- {"help", no_argument, NULL, 'h' },
- {"dev", required_argument, NULL, 'd' },
- {"skb-mode", no_argument, NULL, 'S' },
- {"sec", required_argument, NULL, 's' },
- {"progname", required_argument, NULL, 'p' },
- {"qsize", required_argument, NULL, 'q' },
- {"cpu", required_argument, NULL, 'c' },
- {"stress-mode", no_argument, NULL, 'x' },
- {"no-separators", no_argument, NULL, 'z' },
- {"force", no_argument, NULL, 'F' },
- {"mprog-disable", no_argument, NULL, 'n' },
- {"mprog-name", required_argument, NULL, 'e' },
- {"mprog-filename", required_argument, NULL, 'f' },
- {"redirect-device", required_argument, NULL, 'r' },
- {"redirect-map", required_argument, NULL, 'm' },
- {0, 0, NULL, 0 }
+ { "help", no_argument, NULL, 'h' },
+ { "dev", required_argument, NULL, 'd' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "progname", required_argument, NULL, 'p' },
+ { "qsize", required_argument, NULL, 'q' },
+ { "cpu", required_argument, NULL, 'c' },
+ { "stress-mode", no_argument, NULL, 'x' },
+ { "force", no_argument, NULL, 'F' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "stats", no_argument, NULL, 's' },
+ { "mprog-name", required_argument, NULL, 'e' },
+ { "mprog-filename", required_argument, NULL, 'f' },
+ { "redirect-device", required_argument, NULL, 'r' },
+ { "redirect-map", required_argument, NULL, 'm' },
+ {}
};
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (ifindex > -1) {
- if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(EXIT_FAIL);
- }
- if (prog_id == curr_prog_id) {
- fprintf(stderr,
- "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
- ifindex, ifname);
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
- } else if (!curr_prog_id) {
- printf("couldn't find a prog id on a given iface\n");
- } else {
- printf("program on interface changed, not removing\n");
- }
- }
- /* Detach tracepoints */
- while (tp_cnt)
- bpf_link__destroy(tp_links[--tp_cnt]);
-
- exit(EXIT_OK);
-}
-
static void print_avail_progs(struct bpf_object *obj)
{
struct bpf_program *pos;
+ printf(" Programs to be used for -p/--progname:\n");
bpf_object__for_each_program(pos, obj) {
- if (bpf_program__is_xdp(pos))
- printf(" %s\n", bpf_program__section_name(pos));
- }
-}
-
-static void usage(char *argv[], struct bpf_object *obj)
-{
- int i;
-
- printf("\nDOCUMENTATION:\n%s\n", __doc__);
- printf("\n");
- printf(" Usage: %s (options-see-below)\n", argv[0]);
- printf(" Listing options:\n");
- for (i = 0; long_options[i].name != 0; i++) {
- printf(" --%-12s", long_options[i].name);
- if (long_options[i].flag != NULL)
- printf(" flag (internal value:%d)",
- *long_options[i].flag);
- else
- printf(" short-option: -%c",
- long_options[i].val);
- printf("\n");
- }
- printf("\n Programs to be used for --progname:\n");
- print_avail_progs(obj);
- printf("\n");
-}
-
-/* gettime returns the current time of day in nanoseconds.
- * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
- * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)
- */
-#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-static __u64 gettime(void)
-{
- struct timespec t;
- int res;
-
- res = clock_gettime(CLOCK_MONOTONIC, &t);
- if (res < 0) {
- fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
- exit(EXIT_FAIL);
- }
- return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
-}
-
-/* Common stats data record shared with _kern.c */
-struct datarec {
- __u64 processed;
- __u64 dropped;
- __u64 issue;
- __u64 xdp_pass;
- __u64 xdp_drop;
- __u64 xdp_redirect;
-};
-struct record {
- __u64 timestamp;
- struct datarec total;
- struct datarec *cpu;
-};
-struct stats_record {
- struct record rx_cnt;
- struct record redir_err;
- struct record kthread;
- struct record exception;
- struct record enq[];
-};
-
-static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
-{
- /* For percpu maps, userspace gets a value per possible CPU */
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec values[nr_cpus];
- __u64 sum_xdp_redirect = 0;
- __u64 sum_xdp_pass = 0;
- __u64 sum_xdp_drop = 0;
- __u64 sum_processed = 0;
- __u64 sum_dropped = 0;
- __u64 sum_issue = 0;
- int i;
-
- if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
- fprintf(stderr,
- "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
- return false;
- }
- /* Get time as close as possible to reading map contents */
- rec->timestamp = gettime();
-
- /* Record and sum values from each CPU */
- for (i = 0; i < nr_cpus; i++) {
- rec->cpu[i].processed = values[i].processed;
- sum_processed += values[i].processed;
- rec->cpu[i].dropped = values[i].dropped;
- sum_dropped += values[i].dropped;
- rec->cpu[i].issue = values[i].issue;
- sum_issue += values[i].issue;
- rec->cpu[i].xdp_pass = values[i].xdp_pass;
- sum_xdp_pass += values[i].xdp_pass;
- rec->cpu[i].xdp_drop = values[i].xdp_drop;
- sum_xdp_drop += values[i].xdp_drop;
- rec->cpu[i].xdp_redirect = values[i].xdp_redirect;
- sum_xdp_redirect += values[i].xdp_redirect;
- }
- rec->total.processed = sum_processed;
- rec->total.dropped = sum_dropped;
- rec->total.issue = sum_issue;
- rec->total.xdp_pass = sum_xdp_pass;
- rec->total.xdp_drop = sum_xdp_drop;
- rec->total.xdp_redirect = sum_xdp_redirect;
- return true;
-}
-
-static struct datarec *alloc_record_per_cpu(void)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- struct datarec *array;
-
- array = calloc(nr_cpus, sizeof(struct datarec));
- if (!array) {
- fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
- exit(EXIT_FAIL_MEM);
- }
- return array;
-}
-
-static struct stats_record *alloc_stats_record(void)
-{
- struct stats_record *rec;
- int i, size;
-
- size = sizeof(*rec) + n_cpus * sizeof(struct record);
- rec = malloc(size);
- if (!rec) {
- fprintf(stderr, "Mem alloc error\n");
- exit(EXIT_FAIL_MEM);
- }
- memset(rec, 0, size);
- rec->rx_cnt.cpu = alloc_record_per_cpu();
- rec->redir_err.cpu = alloc_record_per_cpu();
- rec->kthread.cpu = alloc_record_per_cpu();
- rec->exception.cpu = alloc_record_per_cpu();
- for (i = 0; i < n_cpus; i++)
- rec->enq[i].cpu = alloc_record_per_cpu();
-
- return rec;
-}
-
-static void free_stats_record(struct stats_record *r)
-{
- int i;
-
- for (i = 0; i < n_cpus; i++)
- free(r->enq[i].cpu);
- free(r->exception.cpu);
- free(r->kthread.cpu);
- free(r->redir_err.cpu);
- free(r->rx_cnt.cpu);
- free(r);
-}
-
-static double calc_period(struct record *r, struct record *p)
-{
- double period_ = 0;
- __u64 period = 0;
-
- period = r->timestamp - p->timestamp;
- if (period > 0)
- period_ = ((double) period / NANOSEC_PER_SEC);
-
- return period_;
-}
-
-static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->processed - p->processed;
- pps = packets / period_;
- }
- return pps;
-}
-
-static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->dropped - p->dropped;
- pps = packets / period_;
- }
- return pps;
-}
-
-static __u64 calc_errs_pps(struct datarec *r,
- struct datarec *p, double period_)
-{
- __u64 packets = 0;
- __u64 pps = 0;
-
- if (period_ > 0) {
- packets = r->issue - p->issue;
- pps = packets / period_;
- }
- return pps;
-}
-
-static void calc_xdp_pps(struct datarec *r, struct datarec *p,
- double *xdp_pass, double *xdp_drop,
- double *xdp_redirect, double period_)
-{
- *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
- if (period_ > 0) {
- *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
- *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
- *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
- }
-}
-
-static void stats_print(struct stats_record *stats_rec,
- struct stats_record *stats_prev,
- char *prog_name, char *mprog_name, int mprog_fd)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- double pps = 0, drop = 0, err = 0;
- bool mprog_enabled = false;
- struct record *rec, *prev;
- int to_cpu;
- double t;
- int i;
-
- if (mprog_fd > 0)
- mprog_enabled = true;
-
- /* Header */
- printf("Running XDP/eBPF prog_name:%s\n", prog_name);
- printf("%-15s %-7s %-14s %-11s %-9s\n",
- "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
-
- /* XDP rx_cnt */
- {
- char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
- char *errstr = "";
-
- rec = &stats_rec->rx_cnt;
- prev = &stats_prev->rx_cnt;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0)
- errstr = "cpu-dest/err";
- if (pps > 0)
- printf(fmt_rx, "XDP-RX",
- i, pps, drop, err, errstr);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- printf(fm2_rx, "XDP-RX", "total", pps, drop);
- }
-
- /* cpumap enqueue stats */
- for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) {
- char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
- char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
- char *errstr = "";
-
- rec = &stats_rec->enq[to_cpu];
- prev = &stats_prev->enq[to_cpu];
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0) {
- errstr = "bulk-average";
- err = pps / err; /* calc average bulk size */
- }
- if (pps > 0)
- printf(fmt, "cpumap-enqueue",
- i, to_cpu, pps, drop, err, errstr);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- if (pps > 0) {
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- if (err > 0) {
- errstr = "bulk-average";
- err = pps / err; /* calc average bulk size */
- }
- printf(fm2, "cpumap-enqueue",
- "sum", to_cpu, pps, drop, err, errstr);
- }
- }
-
- /* cpumap kthread stats */
- {
- char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
- char *e_str = "";
-
- rec = &stats_rec->kthread;
- prev = &stats_prev->kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- err = calc_errs_pps(r, p, t);
- if (err > 0)
- e_str = "sched";
- if (pps > 0)
- printf(fmt_k, "cpumap_kthread",
- i, pps, drop, err, e_str);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- err = calc_errs_pps(&rec->total, &prev->total, t);
- if (err > 0)
- e_str = "sched-sum";
- printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
- }
-
- /* XDP redirect err tracepoints (very unlikely) */
- {
- char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
- char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
- rec = &stats_rec->redir_err;
- prev = &stats_prev->redir_err;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- if (pps > 0)
- printf(fmt_err, "redirect_err", i, pps, drop);
+ if (bpf_program__is_xdp(pos)) {
+ if (!strncmp(bpf_program__name(pos), "xdp_prognum",
+ sizeof("xdp_prognum") - 1))
+ printf(" %s\n", bpf_program__name(pos));
}
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- printf(fm2_err, "redirect_err", "total", pps, drop);
}
-
- /* XDP general exception tracepoints */
- {
- char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
- char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
-
- rec = &stats_rec->exception;
- prev = &stats_prev->exception;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- pps = calc_pps(r, p, t);
- drop = calc_drop_pps(r, p, t);
- if (pps > 0)
- printf(fmt_err, "xdp_exception", i, pps, drop);
- }
- pps = calc_pps(&rec->total, &prev->total, t);
- drop = calc_drop_pps(&rec->total, &prev->total, t);
- printf(fm2_err, "xdp_exception", "total", pps, drop);
- }
-
- /* CPUMAP attached XDP program that runs on remote/destination CPU */
- if (mprog_enabled) {
- char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f\n";
- char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f\n";
- double xdp_pass, xdp_drop, xdp_redirect;
-
- printf("\n2nd remote XDP/eBPF prog_name: %s\n", mprog_name);
- printf("%-15s %-7s %-14s %-11s %-9s\n",
- "XDP-cpumap", "CPU:to", "xdp-pass", "xdp-drop", "xdp-redir");
-
- rec = &stats_rec->kthread;
- prev = &stats_prev->kthread;
- t = calc_period(rec, prev);
- for (i = 0; i < nr_cpus; i++) {
- struct datarec *r = &rec->cpu[i];
- struct datarec *p = &prev->cpu[i];
-
- calc_xdp_pps(r, p, &xdp_pass, &xdp_drop,
- &xdp_redirect, t);
- if (xdp_pass > 0 || xdp_drop > 0 || xdp_redirect > 0)
- printf(fmt_k, "xdp-in-kthread", i, xdp_pass, xdp_drop,
- xdp_redirect);
- }
- calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
- &xdp_redirect, t);
- printf(fm2_k, "xdp-in-kthread", "total", xdp_pass, xdp_drop, xdp_redirect);
- }
-
- printf("\n");
- fflush(stdout);
-}
-
-static void stats_collect(struct stats_record *rec)
-{
- int fd, i;
-
- fd = map_fds[RX_CNT];
- map_collect_percpu(fd, 0, &rec->rx_cnt);
-
- fd = map_fds[REDIRECT_ERR_CNT];
- map_collect_percpu(fd, 1, &rec->redir_err);
-
- fd = map_fds[CPUMAP_ENQUEUE_CNT];
- for (i = 0; i < n_cpus; i++)
- map_collect_percpu(fd, i, &rec->enq[i]);
-
- fd = map_fds[CPUMAP_KTHREAD_CNT];
- map_collect_percpu(fd, 0, &rec->kthread);
-
- fd = map_fds[EXCEPTION_CNT];
- map_collect_percpu(fd, 0, &rec->exception);
}
-
-/* Pointer swap trick */
-static inline void swap(struct stats_record **a, struct stats_record **b)
+static void usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error, struct bpf_object *obj)
{
- struct stats_record *tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
+ sample_usage(argv, long_options, doc, mask, error);
+ print_avail_progs(obj);
}
static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
@@ -582,39 +95,41 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
- ret = bpf_map_update_elem(map_fds[CPU_MAP], &cpu, value, 0);
- if (ret) {
- fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno));
+ return ret;
}
/* Inform bpf_prog's that a new CPU is available to select
* from via some control maps.
*/
- ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &avail_idx, &cpu, 0);
- if (ret) {
- fprintf(stderr, "Add to avail CPUs failed\n");
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno));
+ return ret;
}
/* When not replacing/updating existing entry, bump the count */
- ret = bpf_map_lookup_elem(map_fds[CPUS_COUNT], &key, &curr_cpus_count);
- if (ret) {
- fprintf(stderr, "Failed reading curr cpus_count\n");
- exit(EXIT_FAIL_BPF);
+ ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
+ if (ret < 0) {
+ fprintf(stderr, "Failed reading curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
}
if (new) {
curr_cpus_count++;
- ret = bpf_map_update_elem(map_fds[CPUS_COUNT], &key,
+ ret = bpf_map_update_elem(count_fd, &key,
&curr_cpus_count, 0);
- if (ret) {
- fprintf(stderr, "Failed write curr cpus_count\n");
- exit(EXIT_FAIL_BPF);
+ if (ret < 0) {
+ fprintf(stderr, "Failed write curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
}
}
- /* map_fd[7] = cpus_iterator */
- printf("%s CPU:%u as idx:%u qsize:%d prog_fd: %d (cpus_count:%u)\n",
- new ? "Add-new":"Replace", cpu, avail_idx,
+
+ printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
+ new ? "Add new" : "Replace", cpu, avail_idx,
value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
@@ -623,24 +138,29 @@ static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
/* CPUs are zero-indexed. Thus, add a special sentinel default value
* in map cpus_available to mark CPU index'es not configured
*/
-static void mark_cpus_unavailable(void)
+static int mark_cpus_unavailable(void)
{
+ int ret, i, n_cpus = libbpf_num_possible_cpus();
__u32 invalid_cpu = n_cpus;
- int ret, i;
for (i = 0; i < n_cpus; i++) {
- ret = bpf_map_update_elem(map_fds[CPUS_AVAILABLE], &i,
+ ret = bpf_map_update_elem(avail_fd, &i,
&invalid_cpu, 0);
- if (ret) {
- fprintf(stderr, "Failed marking CPU unavailable\n");
- exit(EXIT_FAIL_BPF);
+ if (ret < 0) {
+ fprintf(stderr, "Failed marking CPU unavailable: %s\n",
+ strerror(errno));
+ return ret;
}
}
+
+ return 0;
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
-static void stress_cpumap(struct bpf_cpumap_val *value)
+static void stress_cpumap(void *ctx)
{
+ struct bpf_cpumap_val *value = ctx;
+
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
@@ -653,144 +173,163 @@ static void stress_cpumap(struct bpf_cpumap_val *value)
create_cpu_entry(1, value, 0, false);
}
-static void stats_poll(int interval, bool use_separators, char *prog_name,
- char *mprog_name, struct bpf_cpumap_val *value,
- bool stress_mode)
-{
- struct stats_record *record, *prev;
- int mprog_fd;
-
- record = alloc_stats_record();
- prev = alloc_stats_record();
- stats_collect(record);
-
- /* Trick to pretty printf with thousands separators use %' */
- if (use_separators)
- setlocale(LC_NUMERIC, "en_US");
-
- while (1) {
- swap(&prev, &record);
- mprog_fd = value->bpf_prog.fd;
- stats_collect(record);
- stats_print(record, prev, prog_name, mprog_name, mprog_fd);
- sleep(interval);
- if (stress_mode)
- stress_cpumap(value);
- }
-
- free_stats_record(record);
- free_stats_record(prev);
-}
-
-static int init_tracepoints(struct bpf_object *obj)
+static int set_cpumap_prog(struct xdp_redirect_cpu *skel,
+ const char *redir_interface, const char *redir_map,
+ const char *mprog_filename, const char *mprog_name)
{
- struct bpf_program *prog;
-
- bpf_object__for_each_program(prog, obj) {
- if (bpf_program__is_tracepoint(prog) != true)
- continue;
-
- tp_links[tp_cnt] = bpf_program__attach(prog);
- if (libbpf_get_error(tp_links[tp_cnt])) {
- tp_links[tp_cnt] = NULL;
- return -EINVAL;
+ if (mprog_filename) {
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int ret;
+
+ if (!mprog_name) {
+ fprintf(stderr, "BPF program not specified for file %s\n",
+ mprog_filename);
+ goto end;
+ }
+ if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) {
+ fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n",
+ redir_interface ? "device" : "map", redir_interface ? "map" : "device");
+ goto end;
}
- tp_cnt++;
- }
-
- return 0;
-}
-
-static int init_map_fds(struct bpf_object *obj)
-{
- enum map_type type;
-
- for (type = 0; type < NUM_MAP; type++) {
- map_fds[type] =
- bpf_object__find_map_fd_by_name(obj,
- map_type_strings[type]);
-
- if (map_fds[type] < 0)
- return -ENOENT;
- }
-
- return 0;
-}
-static int load_cpumap_prog(char *file_name, char *prog_name,
- char *redir_interface, char *redir_map)
-{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- .expected_attach_type = BPF_XDP_CPUMAP,
- .file = file_name,
- };
- struct bpf_program *prog;
- struct bpf_object *obj;
- int fd;
+ /* Custom BPF program */
+ obj = bpf_object__open_file(mprog_filename, NULL);
+ if (!obj) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n",
+ strerror(errno));
+ return ret;
+ }
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &fd))
- return -1;
+ ret = bpf_object__load(obj);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_object__load: %s\n",
+ strerror(errno));
+ return ret;
+ }
- if (fd < 0) {
- fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
- strerror(errno));
- return fd;
- }
+ if (redir_map) {
+ int err, redir_map_fd, ifindex_out, key = 0;
- if (redir_interface && redir_map) {
- int err, map_fd, ifindex_out, key = 0;
+ redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
+ if (redir_map_fd < 0) {
+ fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n",
+ strerror(errno));
+ return redir_map_fd;
+ }
- map_fd = bpf_object__find_map_fd_by_name(obj, redir_map);
- if (map_fd < 0)
- return map_fd;
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ ifindex_out = strtoul(redir_interface, NULL, 0);
+ if (!ifindex_out) {
+ fprintf(stderr, "Bad interface name or index\n");
+ return -EINVAL;
+ }
- ifindex_out = if_nametoindex(redir_interface);
- if (!ifindex_out)
- return -1;
+ err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0);
+ if (err < 0)
+ return err;
+ }
- err = bpf_map_update_elem(map_fd, &key, &ifindex_out, 0);
- if (err < 0)
- return err;
- }
+ prog = bpf_object__find_program_by_name(obj, mprog_name);
+ if (!prog) {
+ ret = -errno;
+ fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n",
+ strerror(errno));
+ return ret;
+ }
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (!prog) {
- fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- return EXIT_FAIL;
+ return bpf_program__fd(prog);
+ } else {
+ if (mprog_name) {
+ if (redir_interface || redir_map) {
+ fprintf(stderr, "Need to specify --mprog-filename/-f\n");
+ goto end;
+ }
+ if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) {
+ /* Use built-in pass/drop programs */
+ return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass)
+ : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop);
+ } else {
+ fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n",
+ mprog_name);
+ goto end;
+ }
+ } else {
+ if (redir_map) {
+ fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and"
+ " --redirect-device with --redirect-map\n");
+ goto end;
+ }
+ if (redir_interface) {
+ /* Use built-in devmap redirect */
+ struct bpf_devmap_val val = {};
+ int ifindex_out, err;
+ __u32 key = 0;
+
+ if (!redir_interface)
+ return 0;
+
+ ifindex_out = if_nametoindex(redir_interface);
+ if (!ifindex_out)
+ ifindex_out = strtoul(redir_interface, NULL, 0);
+ if (!ifindex_out) {
+ fprintf(stderr, "Bad interface name or index\n");
+ return -EINVAL;
+ }
+
+ if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) {
+ printf("Get interface %d mac failed\n", ifindex_out);
+ return -EINVAL;
+ }
+
+ val.ifindex = ifindex_out;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
+ if (err < 0)
+ return -errno;
+
+ return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap);
+ }
+ }
}
- return bpf_program__fd(prog);
+ /* Disabled */
+ return 0;
+end:
+ fprintf(stderr, "Invalid options for CPUMAP BPF program\n");
+ return -EINVAL;
}
int main(int argc, char **argv)
{
- char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs";
- char *mprog_filename = "xdp_redirect_kern.o";
- char *redir_interface = NULL, *redir_map = NULL;
- char *mprog_name = "xdp_redirect_dummy";
- bool mprog_disable = false;
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
+ const char *redir_interface = NULL, *redir_map = NULL;
+ const char *mprog_filename = NULL, *mprog_name = NULL;
+ struct xdp_redirect_cpu *skel;
+ struct bpf_map_info info = {};
+ char ifname_buf[IF_NAMESIZE];
struct bpf_cpumap_val value;
- bool use_separators = true;
+ __u32 infosz = sizeof(info);
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
bool stress_mode = false;
struct bpf_program *prog;
- struct bpf_object *obj;
- int err = EXIT_FAIL;
- char filename[256];
+ const char *prog_name;
+ bool generic = false;
+ bool force = false;
int added_cpus = 0;
+ bool error = true;
int longindex = 0;
- int interval = 2;
int add_cpu = -1;
- int opt, prog_fd;
- int *cpu, i;
+ int ifindex = -1;
+ int *cpu, i, opt;
+ char *ifname;
__u32 qsize;
+ int n_cpus;
- n_cpus = get_nprocs_conf();
+ n_cpus = libbpf_num_possible_cpus();
/* Notice: Choosing the queue size is very important when CPU is
* configured with power-saving states.
@@ -810,73 +349,87 @@ int main(int argc, char **argv)
*/
qsize = 2048;
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return err;
-
- if (prog_fd < 0) {
- fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n",
+ skel = xdp_redirect_cpu__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n",
strerror(errno));
- return err;
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (init_tracepoints(obj) < 0) {
- fprintf(stderr, "ERR: bpf_program__attach failed\n");
- return err;
+ if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpu_map map: %s",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (init_map_fds(obj) < 0) {
- fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
- return err;
+ if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
+ fprintf(stderr, "Failed to set max entries for cpus_available map: %s",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- mark_cpus_unavailable();
- cpu = malloc(n_cpus * sizeof(int));
+ cpu = calloc(n_cpus, sizeof(int));
if (!cpu) {
- fprintf(stderr, "failed to allocate cpu array\n");
- return err;
+ fprintf(stderr, "Failed to allocate cpu array\n");
+ goto end_destroy;
}
- memset(cpu, 0, n_cpus * sizeof(int));
- /* Parse commands line args */
- while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
+ prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs;
+ while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh",
long_options, &longindex)) != -1) {
switch (opt) {
case 'd':
if (strlen(optarg) >= IF_NAMESIZE) {
- fprintf(stderr, "ERR: --dev name too long\n");
- goto error;
+ fprintf(stderr, "-d/--dev name too long\n");
+ goto end_cpu;
}
ifname = (char *)&ifname_buf;
- strncpy(ifname, optarg, IF_NAMESIZE);
+ safe_strncpy(ifname, optarg, sizeof(ifname));
ifindex = if_nametoindex(ifname);
- if (ifindex == 0) {
- fprintf(stderr,
- "ERR: --dev name unknown err(%d):%s\n",
+ if (!ifindex)
+ ifindex = strtoul(optarg, NULL, 0);
+ if (!ifindex) {
+ fprintf(stderr, "Bad interface index or name (%d): %s\n",
errno, strerror(errno));
- goto error;
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
break;
case 's':
- interval = atoi(optarg);
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
break;
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
+ generic = true;
break;
case 'x':
stress_mode = true;
break;
- case 'z':
- use_separators = false;
- break;
case 'p':
/* Selecting eBPF prog to load */
prog_name = optarg;
- break;
- case 'n':
- mprog_disable = true;
+ prog = bpf_object__find_program_by_name(skel->obj,
+ prog_name);
+ if (!prog) {
+ fprintf(stderr,
+ "Failed to find program %s specified by"
+ " option -p/--progname\n",
+ prog_name);
+ print_avail_progs(skel->obj);
+ goto end_cpu;
+ }
break;
case 'f':
mprog_filename = optarg;
@@ -886,6 +439,7 @@ int main(int argc, char **argv)
break;
case 'r':
redir_interface = optarg;
+ mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
break;
case 'm':
redir_map = optarg;
@@ -895,93 +449,115 @@ int main(int argc, char **argv)
add_cpu = strtoul(optarg, NULL, 0);
if (add_cpu >= n_cpus) {
fprintf(stderr,
- "--cpu nr too large for cpumap err(%d):%s\n",
+ "--cpu nr too large for cpumap err (%d):%s\n",
errno, strerror(errno));
- goto error;
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
cpu[added_cpus++] = add_cpu;
break;
case 'q':
- qsize = atoi(optarg);
+ qsize = strtoul(optarg, NULL, 0);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
+ break;
+ case 'v':
+ sample_switch_mode();
break;
case 'h':
- error:
+ error = false;
default:
- free(cpu);
- usage(argv, obj);
- return EXIT_FAIL_OPTION;
+ usage(argv, long_options, __doc__, mask, error, skel->obj);
+ goto end_cpu;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- /* Required option */
+ ret = EXIT_FAIL_OPTION;
if (ifindex == -1) {
- fprintf(stderr, "ERR: required option --dev missing\n");
- usage(argv, obj);
- err = EXIT_FAIL_OPTION;
- goto out;
+ fprintf(stderr, "Required option --dev missing\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- /* Required option */
+
if (add_cpu == -1) {
- fprintf(stderr, "ERR: required option --cpu missing\n");
- fprintf(stderr, " Specify multiple --cpu option to add more\n");
- usage(argv, obj);
- err = EXIT_FAIL_OPTION;
- goto out;
+ fprintf(stderr, "Required option --cpu missing\n"
+ "Specify multiple --cpu option to add more\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
}
- value.bpf_prog.fd = 0;
- if (!mprog_disable)
- value.bpf_prog.fd = load_cpumap_prog(mprog_filename, mprog_name,
- redir_interface, redir_map);
- if (value.bpf_prog.fd < 0) {
- err = value.bpf_prog.fd;
- goto out;
+ skel->rodata->from_match[0] = ifindex;
+ if (redir_interface)
+ skel->rodata->to_match[0] = if_nametoindex(redir_interface);
+
+ ret = xdp_redirect_cpu__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n",
+ strerror(errno));
+ goto end_cpu;
}
- value.qsize = qsize;
- for (i = 0; i < added_cpus; i++)
- create_cpu_entry(cpu[i], &value, i, true);
+ ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
+ if (ret < 0) {
+ fprintf(stderr, "Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
+ strerror(errno));
+ goto end_cpu;
+ }
- /* Remove XDP program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ skel->bss->cpumap_map_id = info.id;
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (!prog) {
- fprintf(stderr, "bpf_object__find_program_by_title failed\n");
- goto out;
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ avail_fd = bpf_map__fd(skel->maps.cpus_available);
+ count_fd = bpf_map__fd(skel->maps.cpus_count);
+
+ ret = mark_cpus_unavailable();
+ if (ret < 0) {
+ fprintf(stderr, "Unable to mark CPUs as unavailable\n");
+ goto end_cpu;
}
- prog_fd = bpf_program__fd(prog);
- if (prog_fd < 0) {
- fprintf(stderr, "bpf_program__fd failed\n");
- goto out;
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_cpu;
}
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
- fprintf(stderr, "link set xdp fd failed\n");
- err = EXIT_FAIL_XDP;
- goto out;
+ value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map,
+ mprog_filename, mprog_name);
+ if (value.bpf_prog.fd < 0) {
+ fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n",
+ strerror(-value.bpf_prog.fd));
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ ret = EXIT_FAIL_BPF;
+ goto end_cpu;
}
+ value.qsize = qsize;
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (err) {
- printf("can't get prog info - %s\n", strerror(errno));
- goto out;
+ for (i = 0; i < added_cpus; i++) {
+ if (create_cpu_entry(cpu[i], &value, i, true) < 0) {
+ fprintf(stderr, "Cannot proceed, exiting\n");
+ usage(argv, long_options, __doc__, mask, true, skel->obj);
+ goto end_cpu;
+ }
}
- prog_id = info.id;
- stats_poll(interval, use_separators, prog_name, mprog_name,
- &value, stress_mode);
+ ret = EXIT_FAIL_XDP;
+ if (sample_install_xdp(prog, ifindex, generic, force) < 0)
+ goto end_cpu;
- err = EXIT_OK;
-out:
+ ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_cpu;
+ }
+ ret = EXIT_OK;
+end_cpu:
free(cpu);
- return err;
+end_destroy:
+ xdp_redirect_cpu__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c
deleted file mode 100644
index d26ec3aa215e..000000000000
--- a/samples/bpf/xdp_redirect_kern.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, int);
- __type(value, int);
- __uint(max_entries, 1);
-} tx_port SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
-
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
-
-SEC("xdp_redirect")
-int xdp_redirect_prog(struct xdp_md *ctx)
-{
- void *data_end = (void *)(long)ctx->data_end;
- void *data = (void *)(long)ctx->data;
- struct ethhdr *eth = data;
- int rc = XDP_DROP;
- int *ifindex, port = 0;
- long *value;
- u32 key = 0;
- u64 nh_off;
-
- nh_off = sizeof(*eth);
- if (data + nh_off > data_end)
- return rc;
-
- ifindex = bpf_map_lookup_elem(&tx_port, &port);
- if (!ifindex)
- return rc;
-
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
-
- swap_src_dst_mac(data);
- return bpf_redirect(*ifindex, 0);
-}
-
-/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
-int xdp_redirect_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map.bpf.c
index a92b8e567bdd..59efd656e1b2 100644
--- a/samples/bpf/xdp_redirect_map_kern.c
+++ b/samples/bpf/xdp_redirect_map.bpf.c
@@ -10,14 +10,10 @@
* General Public License for more details.
*/
#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
/* The 2nd xdp prog on egress does not support skb mode, so we define two
* maps, tx_port_general and tx_port_native.
@@ -26,114 +22,71 @@ struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(int));
- __uint(max_entries, 100);
+ __uint(max_entries, 1);
} tx_port_general SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP);
__uint(key_size, sizeof(int));
__uint(value_size, sizeof(struct bpf_devmap_val));
- __uint(max_entries, 100);
-} tx_port_native SEC(".maps");
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback. Redirect TX errors can be caught via a tracepoint.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-/* map to store egress interface mac address */
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __type(key, u32);
- __type(value, __be64);
__uint(max_entries, 1);
-} tx_mac SEC(".maps");
-
-static void swap_src_dst_mac(void *data)
-{
- unsigned short *p = data;
- unsigned short dst[3];
+} tx_port_native SEC(".maps");
- dst[0] = p[0];
- dst[1] = p[1];
- dst[2] = p[2];
- p[0] = p[3];
- p[1] = p[4];
- p[2] = p[5];
- p[3] = dst[0];
- p[4] = dst[1];
- p[5] = dst[2];
-}
+/* store egress interface mac address */
+const volatile char tx_mac_addr[ETH_ALEN];
static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
+ u32 key = bpf_get_smp_processor_id();
struct ethhdr *eth = data;
- int rc = XDP_DROP;
- long *value;
- u32 key = 0;
+ struct datarec *rec;
u64 nh_off;
- int vport;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
- return rc;
-
- /* constant virtual port */
- vport = 0;
-
- /* count packet in global counter */
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
+ return XDP_DROP;
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
swap_src_dst_mac(data);
-
- /* send packet out physical port */
- return bpf_redirect_map(redirect_map, vport, 0);
+ return bpf_redirect_map(redirect_map, 0, 0);
}
-SEC("xdp_redirect_general")
+SEC("xdp")
int xdp_redirect_map_general(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &tx_port_general);
}
-SEC("xdp_redirect_native")
+SEC("xdp")
int xdp_redirect_map_native(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &tx_port_native);
}
-SEC("xdp_devmap/map_prog")
+SEC("xdp_devmap/egress")
int xdp_redirect_map_egress(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
- __be64 *mac;
- u32 key = 0;
u64 nh_off;
nh_off = sizeof(*eth);
if (data + nh_off > data_end)
return XDP_DROP;
- mac = bpf_map_lookup_elem(&tx_mac, &key);
- if (mac)
- __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
return XDP_PASS;
}
/* Redirect require an XDP bpf_prog loaded on the TX device */
-SEC("xdp_redirect_dummy")
+SEC("xdp")
int xdp_redirect_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/samples/bpf/xdp_redirect_map_multi_kern.c b/samples/bpf/xdp_redirect_map_multi.bpf.c
index 71aa23d1cb2b..8f59d430cb64 100644
--- a/samples/bpf/xdp_redirect_map_multi_kern.c
+++ b/samples/bpf/xdp_redirect_map_multi.bpf.c
@@ -1,11 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#define KBUILD_MODNAME "foo"
-#include <uapi/linux/bpf.h>
-#include <linux/in.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <bpf/bpf_helpers.h>
+
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+
+enum {
+ BPF_F_BROADCAST = (1ULL << 3),
+ BPF_F_EXCLUDE_INGRESS = (1ULL << 4),
+};
struct {
__uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
@@ -21,50 +24,41 @@ struct {
__uint(max_entries, 32);
} forward_map_native SEC(".maps");
+/* map to store egress interfaces mac addresses */
struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __type(key, u32);
- __type(value, long);
- __uint(max_entries, 1);
-} rxcnt SEC(".maps");
-
-/* map to store egress interfaces mac addresses, set the
- * max_entries to 1 and extend it in user sapce prog.
- */
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(type, BPF_MAP_TYPE_HASH);
__type(key, u32);
__type(value, __be64);
- __uint(max_entries, 1);
+ __uint(max_entries, 32);
} mac_map SEC(".maps");
static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
{
- long *value;
- u32 key = 0;
+ u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
- /* count packet in global counter */
- value = bpf_map_lookup_elem(&rxcnt, &key);
- if (value)
- *value += 1;
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
- return bpf_redirect_map(forward_map, key,
+ return bpf_redirect_map(forward_map, 0,
BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
}
-SEC("xdp_redirect_general")
+SEC("xdp")
int xdp_redirect_map_general(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &forward_map_general);
}
-SEC("xdp_redirect_native")
+SEC("xdp")
int xdp_redirect_map_native(struct xdp_md *ctx)
{
return xdp_redirect_map(ctx, &forward_map_native);
}
-SEC("xdp_devmap/map_prog")
+SEC("xdp_devmap/egress")
int xdp_devmap_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c
index 84cdbbed20b7..315314716121 100644
--- a/samples/bpf/xdp_redirect_map_multi_user.c
+++ b/samples/bpf/xdp_redirect_map_multi_user.c
@@ -1,7 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
+static const char *__doc__ =
+"XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n"
+"Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
+#include <getopt.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
@@ -15,106 +20,54 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
-
-#include "bpf_util.h"
+#include <linux/if_ether.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map_multi.skel.h"
#define MAX_IFACE_NUM 32
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static int ifaces[MAX_IFACE_NUM] = {};
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
- __u32 prog_id = 0;
- int i;
-
- for (i = 0; ifaces[i] > 0; i++) {
- if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id)
- bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
- }
-
- exit(0);
-}
-
-static void poll_stats(int interval)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("Forwarding %10llu pkt/s\n", sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
-
-static int get_mac_addr(unsigned int ifindex, void *mac_addr)
-{
- char ifname[IF_NAMESIZE];
- struct ifreq ifr;
- int fd, ret = -1;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return ret;
-
- if (!if_indextoname(ifindex, ifname))
- goto err_out;
-
- strcpy(ifr.ifr_name, ifname);
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING;
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- goto err_out;
+DEFINE_SAMPLE_INIT(xdp_redirect_map_multi);
- memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
- ret = 0;
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "load-egress", no_argument, NULL, 'X' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+};
-err_out:
- close(fd);
- return ret;
-}
-
-static int update_mac_map(struct bpf_object *obj)
+static int update_mac_map(struct bpf_map *map)
{
- int i, ret = -1, mac_map_fd;
+ int mac_map_fd = bpf_map__fd(map);
unsigned char mac_addr[6];
unsigned int ifindex;
-
- mac_map_fd = bpf_object__find_map_fd_by_name(obj, "mac_map");
- if (mac_map_fd < 0) {
- printf("find mac map fd failed\n");
- return ret;
- }
+ int i, ret = -1;
for (i = 0; ifaces[i] > 0; i++) {
ifindex = ifaces[i];
ret = get_mac_addr(ifindex, mac_addr);
if (ret < 0) {
- printf("get interface %d mac failed\n", ifindex);
+ fprintf(stderr, "get interface %d mac failed\n",
+ ifindex);
return ret;
}
ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
- if (ret) {
- perror("bpf_update_elem mac_map_fd");
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update mac address for ifindex %d\n",
+ ifindex);
return ret;
}
}
@@ -122,181 +75,159 @@ static int update_mac_map(struct bpf_object *obj)
return 0;
}
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n"
- " -X load xdp program on egress\n",
- prog);
-}
-
int main(int argc, char **argv)
{
- int i, ret, opt, forward_map_fd, max_ifindex = 0;
- struct bpf_program *ingress_prog, *egress_prog;
- int ingress_prog_fd, egress_prog_fd = 0;
- struct bpf_devmap_val devmap_val;
- bool attach_egress_prog = false;
+ struct bpf_devmap_val devmap_val = {};
+ struct xdp_redirect_map_multi *skel;
+ struct bpf_program *ingress_prog;
+ bool xdp_devmap_attached = false;
+ struct bpf_map *forward_map;
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
char ifname[IF_NAMESIZE];
- struct bpf_map *mac_map;
- struct bpf_object *obj;
unsigned int ifindex;
- char filename[256];
-
- while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+ bool generic = false;
+ bool force = false;
+ bool tried = false;
+ bool error = true;
+ int i, opt;
+
+ while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
break;
case 'X':
- attach_egress_prog = true;
+ xdp_devmap_attached = true;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- } else if (attach_egress_prog) {
- printf("Load xdp program on egress with SKB mode not supported yet\n");
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
- return 1;
+ skel = xdp_redirect_map_multi__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- printf("Get interfaces");
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL_OPTION;
for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
ifaces[i] = if_nametoindex(argv[optind + i]);
if (!ifaces[i])
ifaces[i] = strtoul(argv[optind + i], NULL, 0);
if (!if_indextoname(ifaces[i], ifname)) {
- perror("Invalid interface name or i");
- return 1;
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end_destroy;
}
- /* Find the largest index number */
- if (ifaces[i] > max_ifindex)
- max_ifindex = ifaces[i];
-
- printf(" %d", ifaces[i]);
- }
- printf("\n");
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
- obj = bpf_object__open(filename);
- if (libbpf_get_error(obj)) {
- printf("ERROR: opening BPF object file failed\n");
- obj = NULL;
- goto err_out;
+ skel->rodata->from_match[i] = ifaces[i];
+ skel->rodata->to_match[i] = ifaces[i];
}
- /* Reset the map size to max ifindex + 1 */
- if (attach_egress_prog) {
- mac_map = bpf_object__find_map_by_name(obj, "mac_map");
- ret = bpf_map__resize(mac_map, max_ifindex + 1);
- if (ret < 0) {
- printf("ERROR: reset mac map size failed\n");
- goto err_out;
- }
+ ret = xdp_redirect_map_multi__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- /* load BPF program */
- if (bpf_object__load(obj)) {
- printf("ERROR: loading BPF object file failed\n");
- goto err_out;
- }
-
- if (xdp_flags & XDP_FLAGS_SKB_MODE) {
- ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
- forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_general");
- } else {
- ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
- forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_native");
- }
- if (!ingress_prog || forward_map_fd < 0) {
- printf("finding ingress_prog/forward_map in obj file failed\n");
- goto err_out;
- }
-
- ingress_prog_fd = bpf_program__fd(ingress_prog);
- if (ingress_prog_fd < 0) {
- printf("find ingress_prog fd failed\n");
- goto err_out;
- }
-
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- goto err_out;
- }
-
- if (attach_egress_prog) {
+ if (xdp_devmap_attached) {
/* Update mac_map with all egress interfaces' mac addr */
- if (update_mac_map(obj) < 0) {
- printf("Error: update mac map failed");
- goto err_out;
+ if (update_mac_map(skel->maps.mac_map) < 0) {
+ fprintf(stderr, "Updating mac address failed\n");
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
+ }
- /* Find egress prog fd */
- egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
- if (!egress_prog) {
- printf("finding egress_prog in obj file failed\n");
- goto err_out;
- }
- egress_prog_fd = bpf_program__fd(egress_prog);
- if (egress_prog_fd < 0) {
- printf("find egress_prog fd failed\n");
- goto err_out;
- }
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- /* Remove attached program when program is interrupted or killed */
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ ingress_prog = skel->progs.xdp_redirect_map_native;
+ forward_map = skel->maps.forward_map_native;
- /* Init forward multicast groups */
for (i = 0; ifaces[i] > 0; i++) {
ifindex = ifaces[i];
+ ret = EXIT_FAIL_XDP;
+restart:
/* bind prog_fd to each interface */
- ret = bpf_set_link_xdp_fd(ifindex, ingress_prog_fd, xdp_flags);
- if (ret) {
- printf("Set xdp fd failed on %d\n", ifindex);
- goto err_out;
+ if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) {
+ if (generic && !tried) {
+ fprintf(stderr,
+ "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+ ingress_prog = skel->progs.xdp_redirect_map_general;
+ forward_map = skel->maps.forward_map_general;
+ tried = true;
+ goto restart;
+ }
+ goto end_destroy;
}
/* Add all the interfaces to forward group and attach
- * egress devmap programe if exist
+ * egress devmap program if exist
*/
devmap_val.ifindex = ifindex;
- devmap_val.bpf_prog.fd = egress_prog_fd;
- ret = bpf_map_update_elem(forward_map_fd, &ifindex, &devmap_val, 0);
- if (ret) {
- perror("bpf_map_update_elem forward_map");
- goto err_out;
+ if (xdp_devmap_attached)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog);
+ ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
}
- poll_stats(2);
-
- return 0;
-
-err_out:
- return 1;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect_map_multi__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 0e8192688dfc..b6e4fc849577 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -1,6 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
*/
+static const char *__doc__ =
+"XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n"
+"Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
@@ -13,165 +17,83 @@
#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include "bpf_util.h"
+#include <getopt.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect_map.skel.h"
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static bool xdp_devmap_attached;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface IN\n");
- else
- printf("program on iface IN changed, not removing\n");
-
- if (ifindex_out_xdp_dummy_attached) {
- curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
- xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (dummy_prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface OUT\n");
- else
- printf("program on iface OUT changed, not removing\n");
- }
- exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
-
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("ifindex %i: %10llu pkt/s\n",
- ifindex, sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
-
-static int get_mac_addr(unsigned int ifindex_out, void *mac_addr)
-{
- char ifname[IF_NAMESIZE];
- struct ifreq ifr;
- int fd, ret = -1;
-
- fd = socket(AF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- return ret;
-
- if (!if_indextoname(ifindex_out, ifname))
- goto err_out;
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
- goto err_out;
-
- memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
- ret = 0;
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-err_out:
- close(fd);
- return ret;
-}
+DEFINE_SAMPLE_INIT(xdp_redirect_map);
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n"
- " -X load xdp program on egress\n",
- prog);
-}
+static const struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "skb-mode", no_argument, NULL, 'S' },
+ { "force", no_argument, NULL, 'F' },
+ { "load-egress", no_argument, NULL, 'X' },
+ { "stats", no_argument, NULL, 's' },
+ { "interval", required_argument, NULL, 'i' },
+ { "verbose", no_argument, NULL, 'v' },
+ {}
+};
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_UNSPEC,
- };
- struct bpf_program *prog, *dummy_prog, *devmap_prog;
- int prog_fd, dummy_prog_fd, devmap_prog_fd = 0;
- int tx_port_map_fd, tx_mac_map_fd;
- struct bpf_devmap_val devmap_val;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "FSNX";
- struct bpf_object *obj;
- int ret, opt, key = 0;
- char filename[256];
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
+ struct bpf_devmap_val devmap_val = {};
+ bool xdp_devmap_attached = false;
+ struct xdp_redirect_map *skel;
+ char str[2 * IF_NAMESIZE + 1];
+ char ifname_out[IF_NAMESIZE];
+ struct bpf_map *tx_port_map;
+ char ifname_in[IF_NAMESIZE];
+ int ifindex_in, ifindex_out;
+ unsigned long interval = 2;
+ int ret = EXIT_FAIL_OPTION;
+ struct bpf_program *prog;
+ bool generic = false;
+ bool force = false;
+ bool tried = false;
+ bool error = true;
+ int opt, key = 0;
+
+ while ((opt = getopt_long(argc, argv, "hSFXi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
break;
case 'X':
xdp_devmap_attached = true;
break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
+ break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+ break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
- xdp_flags |= XDP_FLAGS_DRV_MODE;
- } else if (xdp_devmap_attached) {
- printf("Load xdp program on egress with SKB mode not supported yet\n");
- return 1;
- }
-
- if (optind == argc) {
- printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
ifindex_in = if_nametoindex(argv[optind]);
@@ -182,107 +104,116 @@ int main(int argc, char **argv)
if (!ifindex_out)
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
- printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return 1;
-
- if (xdp_flags & XDP_FLAGS_SKB_MODE) {
- prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_general");
- } else {
- prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port_native");
- }
- dummy_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_dummy_prog");
- if (!prog || dummy_prog < 0 || tx_port_map_fd < 0) {
- printf("finding prog/dummy_prog/tx_port_map in obj file failed\n");
- goto out;
- }
- prog_fd = bpf_program__fd(prog);
- dummy_prog_fd = bpf_program__fd(dummy_prog);
- if (prog_fd < 0 || dummy_prog_fd < 0 || tx_port_map_fd < 0) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
- }
-
- tx_mac_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_mac");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (tx_mac_map_fd < 0 || rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ if (!ifindex_in || !ifindex_out) {
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
- if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
- printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
- return 1;
+ skel = xdp_redirect_map__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- prog_id = info.id;
-
- /* Loading dummy XDP prog on out-device */
- if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
- (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
- printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
- ifindex_out_xdp_dummy_attached = false;
- }
-
- memset(&info, 0, sizeof(info));
- ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
- }
- dummy_prog_id = info.id;
/* Load 2nd xdp prog on egress. */
if (xdp_devmap_attached) {
- unsigned char mac_addr[6];
-
- devmap_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_egress");
- if (!devmap_prog) {
- printf("finding devmap_prog in obj file failed\n");
- goto out;
- }
- devmap_prog_fd = bpf_program__fd(devmap_prog);
- if (devmap_prog_fd < 0) {
- printf("finding devmap_prog fd failed\n");
- goto out;
- }
-
- if (get_mac_addr(ifindex_out, mac_addr) < 0) {
- printf("get interface %d mac failed\n", ifindex_out);
- goto out;
+ ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to get interface %d mac address: %s\n",
+ ifindex_out, strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
+ }
- ret = bpf_map_update_elem(tx_mac_map_fd, &key, mac_addr, 0);
- if (ret) {
- perror("bpf_update_elem tx_mac_map_fd");
- goto out;
+ skel->rodata->from_match[0] = ifindex_in;
+ skel->rodata->to_match[0] = ifindex_out;
+
+ ret = xdp_redirect_map__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+
+ prog = skel->progs.xdp_redirect_map_native;
+ tx_port_map = skel->maps.tx_port_native;
+restart:
+ if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) {
+ /* First try with struct bpf_devmap_val as value for generic
+ * mode, then fallback to sizeof(int) for older kernels.
+ */
+ fprintf(stderr,
+ "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n");
+ if (generic && !tried) {
+ prog = skel->progs.xdp_redirect_map_general;
+ tx_port_map = skel->maps.tx_port_general;
+ tried = true;
+ goto restart;
}
+ ret = EXIT_FAIL_XDP;
+ goto end_destroy;
}
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ /* Loading dummy XDP prog on out-device */
+ sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force);
devmap_val.ifindex = ifindex_out;
- devmap_val.bpf_prog.fd = devmap_prog_fd;
- ret = bpf_map_update_elem(tx_port_map_fd, &key, &devmap_val, 0);
- if (ret) {
- perror("bpf_update_elem");
- goto out;
- }
-
- poll_stats(2, ifindex_out);
-
-out:
- return 0;
+ if (xdp_devmap_attached)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress);
+ ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL;
+ if (!if_indextoname(ifindex_in, ifname_in)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+ strerror(errno));
+ goto end_destroy;
+ }
+
+ if (!if_indextoname(ifindex_out, ifname_out)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+ strerror(errno));
+ goto end_destroy;
+ }
+
+ safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+ printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+ snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
+
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect_map__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 93854e135134..7af5b07a7523 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -1,6 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
*/
+static const char *__doc__ =
+"XDP redirect tool, using bpf_redirect helper\n"
+"Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n";
+
#include <linux/bpf.h>
#include <linux/if_link.h>
#include <assert.h>
@@ -13,126 +17,73 @@
#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
+#include <getopt.h>
#include <sys/resource.h>
-
-#include "bpf_util.h"
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+#include "xdp_redirect.skel.h"
-static int ifindex_in;
-static int ifindex_out;
-static bool ifindex_out_xdp_dummy_attached = true;
-static __u32 prog_id;
-static __u32 dummy_prog_id;
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int rxcnt_map_fd;
-
-static void int_exit(int sig)
-{
- __u32 curr_prog_id = 0;
-
- if (bpf_get_link_xdp_id(ifindex_in, &curr_prog_id, xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_in, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface IN\n");
- else
- printf("program on iface IN changed, not removing\n");
-
- if (ifindex_out_xdp_dummy_attached) {
- curr_prog_id = 0;
- if (bpf_get_link_xdp_id(ifindex_out, &curr_prog_id,
- xdp_flags)) {
- printf("bpf_get_link_xdp_id failed\n");
- exit(1);
- }
- if (dummy_prog_id == curr_prog_id)
- bpf_set_link_xdp_fd(ifindex_out, -1, xdp_flags);
- else if (!curr_prog_id)
- printf("couldn't find a prog id on iface OUT\n");
- else
- printf("program on iface OUT changed, not removing\n");
- }
- exit(0);
-}
-
-static void poll_stats(int interval, int ifindex)
-{
- unsigned int nr_cpus = bpf_num_possible_cpus();
- __u64 values[nr_cpus], prev[nr_cpus];
-
- memset(prev, 0, sizeof(prev));
-
- while (1) {
- __u64 sum = 0;
- __u32 key = 0;
- int i;
-
- sleep(interval);
- assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
- for (i = 0; i < nr_cpus; i++)
- sum += (values[i] - prev[i]);
- if (sum)
- printf("ifindex %i: %10llu pkt/s\n",
- ifindex, sum / interval);
- memcpy(prev, values, sizeof(values));
- }
-}
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
-static void usage(const char *prog)
-{
- fprintf(stderr,
- "usage: %s [OPTS] <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n\n"
- "OPTS:\n"
- " -S use skb-mode\n"
- " -N enforce native mode\n"
- " -F force loading prog\n",
- prog);
-}
+DEFINE_SAMPLE_INIT(xdp_redirect);
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"skb-mode", no_argument, NULL, 'S' },
+ {"force", no_argument, NULL, 'F' },
+ {"stats", no_argument, NULL, 's' },
+ {"interval", required_argument, NULL, 'i' },
+ {"verbose", no_argument, NULL, 'v' },
+ {}
+};
int main(int argc, char **argv)
{
- struct bpf_prog_load_attr prog_load_attr = {
- .prog_type = BPF_PROG_TYPE_XDP,
- };
- struct bpf_program *prog, *dummy_prog;
- int prog_fd, tx_port_map_fd, opt;
- struct bpf_prog_info info = {};
- __u32 info_len = sizeof(info);
- const char *optstr = "FSN";
- struct bpf_object *obj;
- char filename[256];
- int dummy_prog_fd;
- int ret, key = 0;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
+ int ifindex_in, ifindex_out, opt;
+ char str[2 * IF_NAMESIZE + 1];
+ char ifname_out[IF_NAMESIZE];
+ char ifname_in[IF_NAMESIZE];
+ int ret = EXIT_FAIL_OPTION;
+ unsigned long interval = 2;
+ struct xdp_redirect *skel;
+ bool generic = false;
+ bool force = false;
+ bool error = true;
+
+ while ((opt = getopt_long(argc, argv, "hSFi:vs",
+ long_options, NULL)) != -1) {
switch (opt) {
case 'S':
- xdp_flags |= XDP_FLAGS_SKB_MODE;
- break;
- case 'N':
- /* default, set below */
+ generic = true;
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
break;
case 'F':
- xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ force = true;
+ break;
+ case 'i':
+ interval = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ sample_switch_mode();
+ break;
+ case 's':
+ mask |= SAMPLE_REDIRECT_CNT;
break;
+ case 'h':
+ error = false;
default:
- usage(basename(argv[0]));
- return 1;
+ sample_usage(argv, long_options, __doc__, mask, error);
+ return ret;
}
}
- if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
- xdp_flags |= XDP_FLAGS_DRV_MODE;
-
- if (optind + 2 != argc) {
- printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
- return 1;
+ if (argc <= optind + 1) {
+ sample_usage(argv, long_options, __doc__, mask, true);
+ return ret;
}
ifindex_in = if_nametoindex(argv[optind]);
@@ -143,75 +94,80 @@ int main(int argc, char **argv)
if (!ifindex_out)
ifindex_out = strtoul(argv[optind + 1], NULL, 0);
- printf("input: %d output: %d\n", ifindex_in, ifindex_out);
-
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
- prog_load_attr.file = filename;
-
- if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
- return 1;
-
- prog = bpf_program__next(NULL, obj);
- dummy_prog = bpf_program__next(prog, obj);
- if (!prog || !dummy_prog) {
- printf("finding a prog in obj file failed\n");
- return 1;
+ if (!ifindex_in || !ifindex_out) {
+ fprintf(stderr, "Bad interface index or name\n");
+ sample_usage(argv, long_options, __doc__, mask, true);
+ goto end;
}
- /* bpf_prog_load_xattr gives us the pointer to first prog's fd,
- * so we're missing only the fd for dummy prog
- */
- dummy_prog_fd = bpf_program__fd(dummy_prog);
- if (prog_fd < 0 || dummy_prog_fd < 0) {
- printf("bpf_prog_load_xattr: %s\n", strerror(errno));
- return 1;
+
+ skel = xdp_redirect__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
}
- tx_port_map_fd = bpf_object__find_map_fd_by_name(obj, "tx_port");
- rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
- if (tx_port_map_fd < 0 || rxcnt_map_fd < 0) {
- printf("bpf_object__find_map_fd_by_name failed\n");
- return 1;
+ ret = sample_init_pre_load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- if (bpf_set_link_xdp_fd(ifindex_in, prog_fd, xdp_flags) < 0) {
- printf("ERROR: link set xdp fd failed on %d\n", ifindex_in);
- return 1;
+ skel->rodata->from_match[0] = ifindex_in;
+ skel->rodata->to_match[0] = ifindex_out;
+ skel->rodata->ifindex_out = ifindex_out;
+
+ ret = xdp_redirect__load(skel);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
}
- ret = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ ret = sample_init(skel, mask);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
- prog_id = info.id;
+
+ ret = EXIT_FAIL_XDP;
+ if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in,
+ generic, force) < 0)
+ goto end_destroy;
/* Loading dummy XDP prog on out-device */
- if (bpf_set_link_xdp_fd(ifindex_out, dummy_prog_fd,
- (xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST)) < 0) {
- printf("WARN: link set xdp fd failed on %d\n", ifindex_out);
- ifindex_out_xdp_dummy_attached = false;
+ sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out,
+ generic, force);
+
+ ret = EXIT_FAIL;
+ if (!if_indextoname(ifindex_in, ifname_in)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in,
+ strerror(errno));
+ goto end_destroy;
}
- memset(&info, 0, sizeof(info));
- ret = bpf_obj_get_info_by_fd(dummy_prog_fd, &info, &info_len);
- if (ret) {
- printf("can't get prog info - %s\n", strerror(errno));
- return ret;
+ if (!if_indextoname(ifindex_out, ifname_out)) {
+ fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out,
+ strerror(errno));
+ goto end_destroy;
}
- dummy_prog_id = info.id;
- signal(SIGINT, int_exit);
- signal(SIGTERM, int_exit);
+ safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str));
+ printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out));
+ snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out);
- /* bpf redirect port */
- ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
- if (ret) {
- perror("bpf_update_elem");
- goto out;
+ ret = sample_run(interval, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
}
-
- poll_stats(2, ifindex_out);
-
-out:
- return ret;
+ ret = EXIT_OK;
+end_destroy:
+ xdp_redirect__destroy(skel);
+end:
+ sample_exit(ret);
}
diff --git a/samples/bpf/xdp_sample.bpf.c b/samples/bpf/xdp_sample.bpf.c
new file mode 100644
index 000000000000..0eb7e1dcae22
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+/* GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */
+#include "xdp_sample.bpf.h"
+
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+array_map rx_cnt SEC(".maps");
+array_map redir_err_cnt SEC(".maps");
+array_map cpumap_enqueue_cnt SEC(".maps");
+array_map cpumap_kthread_cnt SEC(".maps");
+array_map exception_cnt SEC(".maps");
+array_map devmap_xmit_cnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 32 * 32);
+ __type(key, u64);
+ __type(value, struct datarec);
+} devmap_xmit_cnt_multi SEC(".maps");
+
+const volatile int nr_cpus = 0;
+
+/* These can be set before loading so that redundant comparisons can be DCE'd by
+ * the verifier, and only actual matches are tried after loading tp_btf program.
+ * This allows sample to filter tracepoint stats based on net_device.
+ */
+const volatile int from_match[32] = {};
+const volatile int to_match[32] = {};
+
+int cpumap_map_id = 0;
+
+/* Find if b is part of set a, but if a is empty set then evaluate to true */
+#define IN_SET(a, b) \
+ ({ \
+ bool __res = !(a)[0]; \
+ for (int i = 0; i < ARRAY_SIZE(a) && (a)[i]; i++) { \
+ __res = (a)[i] == (b); \
+ if (__res) \
+ break; \
+ } \
+ __res; \
+ })
+
+static __always_inline __u32 xdp_get_err_key(int err)
+{
+ switch (err) {
+ case 0:
+ return 0;
+ case -EINVAL:
+ return 2;
+ case -ENETDOWN:
+ return 3;
+ case -EMSGSIZE:
+ return 4;
+ case -EOPNOTSUPP:
+ return 5;
+ case -ENOSPC:
+ return 6;
+ default:
+ return 1;
+ }
+}
+
+static __always_inline int xdp_redirect_collect_stat(int from, int err)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = XDP_REDIRECT_ERROR;
+ struct datarec *rec;
+ u32 idx;
+
+ if (!IN_SET(from_match, from))
+ return 0;
+
+ key = xdp_get_err_key(err);
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&redir_err_cnt, &idx);
+ if (!rec)
+ return 0;
+ if (key)
+ NO_TEAR_INC(rec->dropped);
+ else
+ NO_TEAR_INC(rec->processed);
+ return 0; /* Indicate event was filtered (no further processing)*/
+ /*
+ * Returning 1 here would allow e.g. a perf-record tracepoint
+ * to see and record these events, but it doesn't work well
+ * in-practice as stopping perf-record also unload this
+ * bpf_prog. Plus, there is additional overhead of doing so.
+ */
+}
+
+SEC("tp_btf/xdp_redirect_err")
+int BPF_PROG(tp_xdp_redirect_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map_err")
+int BPF_PROG(tp_xdp_redirect_map_err, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect")
+int BPF_PROG(tp_xdp_redirect, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_redirect_map")
+int BPF_PROG(tp_xdp_redirect_map, const struct net_device *dev,
+ const struct bpf_prog *xdp, const void *tgt, int err,
+ const struct bpf_map *map, u32 index)
+{
+ return xdp_redirect_collect_stat(dev->ifindex, err);
+}
+
+SEC("tp_btf/xdp_cpumap_enqueue")
+int BPF_PROG(tp_xdp_cpumap_enqueue, int map_id, unsigned int processed,
+ unsigned int drops, int to_cpu)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 idx;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ idx = to_cpu * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (processed > 0)
+ NO_TEAR_INC(rec->issue);
+ /* Inception: It's possible to detect overload situations, via
+ * this tracepoint. This can be used for creating a feedback
+ * loop to XDP, which can take appropriate actions to mitigate
+ * this overload situation.
+ */
+ return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ struct datarec *rec;
+ u32 cpu;
+
+ if (cpumap_map_id && cpumap_map_id != map_id)
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, processed);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_ADD(rec->xdp_pass, xdp_stats->pass);
+ NO_TEAR_ADD(rec->xdp_drop, xdp_stats->drop);
+ NO_TEAR_ADD(rec->xdp_redirect, xdp_stats->redirect);
+ /* Count times kthread yielded CPU via schedule call */
+ if (sched)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(tp_xdp_exception, const struct net_device *dev,
+ const struct bpf_prog *xdp, u32 act)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ u32 key = act, idx;
+
+ if (!IN_SET(from_match, dev->ifindex))
+ return 0;
+ if (!IN_SET(to_match, dev->ifindex))
+ return 0;
+
+ if (key > XDP_REDIRECT)
+ key = XDP_REDIRECT + 1;
+
+ idx = key * nr_cpus + cpu;
+ rec = bpf_map_lookup_elem(&exception_cnt, &idx);
+ if (!rec)
+ return 0;
+ NO_TEAR_INC(rec->dropped);
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u32 cpu;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ cpu = bpf_get_smp_processor_id();
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &cpu);
+ if (!rec)
+ return 0;
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ /* Record bulk events, then userspace can calc average bulk size */
+ NO_TEAR_INC(rec->info);
+ /* Record error cases, where no frame were sent */
+ /* Catch API error of drv ndo_xdp_xmit sent more than count */
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device *from_dev,
+ const struct net_device *to_dev, int sent, int drops, int err)
+{
+ struct datarec empty = {};
+ struct datarec *rec;
+ int idx_in, idx_out;
+ u64 idx;
+
+ idx_in = from_dev->ifindex;
+ idx_out = to_dev->ifindex;
+ idx = idx_in;
+ idx = idx << 32 | idx_out;
+
+ if (!IN_SET(from_match, idx_in))
+ return 0;
+ if (!IN_SET(to_match, idx_out))
+ return 0;
+
+ bpf_map_update_elem(&devmap_xmit_cnt_multi, &idx, &empty, BPF_NOEXIST);
+ rec = bpf_map_lookup_elem(&devmap_xmit_cnt_multi, &idx);
+ if (!rec)
+ return 0;
+
+ NO_TEAR_ADD(rec->processed, sent);
+ NO_TEAR_ADD(rec->dropped, drops);
+ NO_TEAR_INC(rec->info);
+ if (err || drops < 0)
+ NO_TEAR_INC(rec->issue);
+ return 0;
+}
diff --git a/samples/bpf/xdp_sample.bpf.h b/samples/bpf/xdp_sample.bpf.h
new file mode 100644
index 000000000000..25b1dbe9b37b
--- /dev/null
+++ b/samples/bpf/xdp_sample.bpf.h
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _XDP_SAMPLE_BPF_H
+#define _XDP_SAMPLE_BPF_H
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#include "xdp_sample_shared.h"
+
+#define ETH_ALEN 6
+#define ETH_P_802_3_MIN 0x0600
+#define ETH_P_8021Q 0x8100
+#define ETH_P_8021AD 0x88A8
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define ETH_P_ARP 0x0806
+#define IPPROTO_ICMPV6 58
+
+#define EINVAL 22
+#define ENETDOWN 100
+#define EMSGSIZE 90
+#define EOPNOTSUPP 95
+#define ENOSPC 28
+
+typedef struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __type(key, unsigned int);
+ __type(value, struct datarec);
+} array_map;
+
+extern array_map rx_cnt;
+extern const volatile int nr_cpus;
+
+enum {
+ XDP_REDIRECT_SUCCESS = 0,
+ XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline void swap_src_dst_mac(void *data)
+{
+ unsigned short *p = data;
+ unsigned short dst[3];
+
+ dst[0] = p[0];
+ dst[1] = p[1];
+ dst[2] = p[2];
+ p[0] = p[3];
+ p[1] = p[4];
+ p[2] = p[5];
+ p[3] = dst[0];
+ p[4] = dst[1];
+ p[5] = dst[2];
+}
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_ntohs(x) __builtin_bswap16(x)
+#define bpf_htons(x) __builtin_bswap16(x)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
+ __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define bpf_ntohs(x) (x)
+#define bpf_htons(x) (x)
+#else
+# error "Endianness detection needs to be set up for your compiler?!"
+#endif
+
+/*
+ * Note: including linux/compiler.h or linux/kernel.h for the macros below
+ * conflicts with vmlinux.h include in BPF files, so we define them here.
+ *
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8 __attribute__((__may_alias__)) __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break;
+ case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+ case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+ case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break;
+ case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+ case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+ case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+ default:
+ asm volatile ("" : : : "memory");
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ asm volatile ("" : : : "memory");
+ }
+}
+
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+/* Add a value using relaxed read and relaxed write. Less expensive than
+ * fetch_add when there is no write concurrency.
+ */
+#define NO_TEAR_ADD(x, val) WRITE_ONCE((x), READ_ONCE(x) + (val))
+#define NO_TEAR_INC(x) NO_TEAR_ADD((x), 1)
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#endif
diff --git a/samples/bpf/xdp_sample_shared.h b/samples/bpf/xdp_sample_shared.h
new file mode 100644
index 000000000000..8a7669a5d563
--- /dev/null
+++ b/samples/bpf/xdp_sample_shared.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _XDP_SAMPLE_SHARED_H
+#define _XDP_SAMPLE_SHARED_H
+
+struct datarec {
+ size_t processed;
+ size_t dropped;
+ size_t issue;
+ union {
+ size_t xdp_pass;
+ size_t info;
+ };
+ size_t xdp_drop;
+ size_t xdp_redirect;
+} __attribute__((aligned(64)));
+
+#endif
diff --git a/samples/bpf/xdp_sample_user.c b/samples/bpf/xdp_sample_user.c
new file mode 100644
index 000000000000..b32d82178199
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.c
@@ -0,0 +1,1673 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/ethtool.h>
+#include <linux/hashtable.h>
+#include <linux/if_link.h>
+#include <linux/jhash.h>
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <locale.h>
+#include <math.h>
+#include <net/if.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <sys/sysinfo.h>
+#include <sys/timerfd.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "bpf_util.h"
+#include "xdp_sample_user.h"
+
+#define __sample_print(fmt, cond, ...) \
+ ({ \
+ if (cond) \
+ printf(fmt, ##__VA_ARGS__); \
+ })
+
+#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__)
+#define print_default(fmt, ...) \
+ __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__)
+#define __print_err(err, fmt, ...) \
+ ({ \
+ __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT, \
+ ##__VA_ARGS__); \
+ sample_err_exp = sample_err_exp ? true : err > 0; \
+ })
+#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__)
+
+#define __COLUMN(x) "%'10" x " %-13s"
+#define FMT_COLUMNf __COLUMN(".0f")
+#define FMT_COLUMNd __COLUMN("d")
+#define FMT_COLUMNl __COLUMN("llu")
+#define RX(rx) rx, "rx/s"
+#define PPS(pps) pps, "pkt/s"
+#define DROP(drop) drop, "drop/s"
+#define ERR(err) err, "error/s"
+#define HITS(hits) hits, "hit/s"
+#define XMIT(xmit) xmit, "xmit/s"
+#define PASS(pass) pass, "pass/s"
+#define REDIR(redir) redir, "redir/s"
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+
+#define XDP_UNKNOWN (XDP_REDIRECT + 1)
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+#define XDP_REDIRECT_ERR_MAX 7
+
+enum map_type {
+ MAP_RX,
+ MAP_REDIRECT_ERR,
+ MAP_CPUMAP_ENQUEUE,
+ MAP_CPUMAP_KTHREAD,
+ MAP_EXCEPTION,
+ MAP_DEVMAP_XMIT,
+ MAP_DEVMAP_XMIT_MULTI,
+ NUM_MAP,
+};
+
+enum log_level {
+ LL_DEFAULT = 1U << 0,
+ LL_SIMPLE = 1U << 1,
+ LL_DEBUG = 1U << 2,
+};
+
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+
+struct map_entry {
+ struct hlist_node node;
+ __u64 pair;
+ struct record val;
+};
+
+struct stats_record {
+ struct record rx_cnt;
+ struct record redir_err[XDP_REDIRECT_ERR_MAX];
+ struct record kthread;
+ struct record exception[XDP_ACTION_MAX];
+ struct record devmap_xmit;
+ DECLARE_HASHTABLE(xmit_map, 5);
+ struct record enq[];
+};
+
+struct sample_output {
+ struct {
+ __u64 rx;
+ __u64 redir;
+ __u64 drop;
+ __u64 drop_xmit;
+ __u64 err;
+ __u64 xmit;
+ } totals;
+ struct {
+ __u64 pps;
+ __u64 drop;
+ __u64 err;
+ } rx_cnt;
+ struct {
+ __u64 suc;
+ __u64 err;
+ } redir_cnt;
+ struct {
+ __u64 hits;
+ } except_cnt;
+ struct {
+ __u64 pps;
+ __u64 drop;
+ __u64 err;
+ double bavg;
+ } xmit_cnt;
+};
+
+struct xdp_desc {
+ int ifindex;
+ __u32 prog_id;
+ int flags;
+} sample_xdp_progs[32];
+
+struct datarec *sample_mmap[NUM_MAP];
+struct bpf_map *sample_map[NUM_MAP];
+size_t sample_map_count[NUM_MAP];
+enum log_level sample_log_level;
+struct sample_output sample_out;
+unsigned long sample_interval;
+bool sample_err_exp;
+int sample_xdp_cnt;
+int sample_n_cpus;
+int sample_sig_fd;
+int sample_mask;
+
+static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = {
+ /* Key=1 keeps unknown errors */
+ "Success",
+ "Unknown",
+ "EINVAL",
+ "ENETDOWN",
+ "EMSGSIZE",
+ "EOPNOTSUPP",
+ "ENOSPC",
+};
+
+/* Keyed from Unknown */
+static const char *xdp_redirect_err_help[XDP_REDIRECT_ERR_MAX - 1] = {
+ "Unknown error",
+ "Invalid redirection",
+ "Device being redirected to is down",
+ "Packet length too large for device",
+ "Operation not supported",
+ "No space in ptr_ring of cpumap kthread",
+};
+
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+ [XDP_UNKNOWN] = "XDP_UNKNOWN",
+};
+
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ return UINT64_MAX;
+ }
+ return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static void sample_print_help(int mask)
+{
+ printf("Output format description\n\n"
+ "By default, redirect success statistics are disabled, use -s to enable.\n"
+ "The terse output mode is default, verbose mode can be activated using -v\n"
+ "Use SIGQUIT (Ctrl + \\) to switch the mode dynamically at runtime\n\n"
+ "Terse mode displays at most the following fields:\n"
+ " rx/s Number of packets received per second\n"
+ " redir/s Number of packets successfully redirected per second\n"
+ " err,drop/s Aggregated count of errors per second (including dropped packets)\n"
+ " xmit/s Number of packets transmitted on the output device per second\n\n"
+ "Output description for verbose mode:\n"
+ " FIELD DESCRIPTION\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ printf(" receive\t\tDisplays the number of packets received & errors encountered\n"
+ " \t\t\tWhenever an error or packet drop occurs, details of per CPU error\n"
+ " \t\t\tand drop statistics will be expanded inline in terse mode.\n"
+ " \t\t\t\tpkt/s - Packets received per second\n"
+ " \t\t\t\tdrop/s - Packets dropped per second\n"
+ " \t\t\t\terror/s - Errors encountered per second\n\n");
+ }
+ if (mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ printf(" redirect\t\tDisplays the number of packets successfully redirected\n"
+ " \t\t\tErrors encountered are expanded under redirect_err field\n"
+ " \t\t\tNote that passing -s to enable it has a per packet overhead\n"
+ " \t\t\t\tredir/s - Packets redirected successfully per second\n\n"
+ " redirect_err\t\tDisplays the number of packets that failed redirection\n"
+ " \t\t\tThe errno is expanded under this field with per CPU count\n"
+ " \t\t\tThe recognized errors are:\n");
+
+ for (int i = 2; i < XDP_REDIRECT_ERR_MAX; i++)
+ printf("\t\t\t %s: %s\n", xdp_redirect_err_names[i],
+ xdp_redirect_err_help[i - 1]);
+
+ printf(" \n\t\t\t\terror/s - Packets that failed redirection per second\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ printf(" enqueue to cpu N\tDisplays the number of packets enqueued to bulk queue of CPU N\n"
+ " \t\t\tExpands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N\n"
+ " \t\t\tReceived packets can be associated with the CPU redirect program is enqueuing \n"
+ " \t\t\tpackets to.\n"
+ " \t\t\t\tpkt/s - Packets enqueued per second from other CPU to CPU N\n"
+ " \t\t\t\tdrop/s - Packets dropped when trying to enqueue to CPU N\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ printf(" kthread\t\tDisplays the number of packets processed in CPUMAP kthread for each CPU\n"
+ " \t\t\tPackets consumed from ptr_ring in kthread, and its xdp_stats (after calling \n"
+ " \t\t\tCPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and\n"
+ " \t\t\tthen per-CPU to associate it to each CPU's pinned CPUMAP kthread.\n"
+ " \t\t\t\tpkt/s - Packets consumed per second from ptr_ring\n"
+ " \t\t\t\tdrop/s - Packets dropped per second in kthread\n"
+ " \t\t\t\tsched - Number of times kthread called schedule()\n\n"
+ " \t\t\txdp_stats (also expands to per-CPU counts)\n"
+ " \t\t\t\tpass/s - XDP_PASS count for CPUMAP program execution\n"
+ " \t\t\t\tdrop/s - XDP_DROP count for CPUMAP program execution\n"
+ " \t\t\t\tredir/s - XDP_REDIRECT count for CPUMAP program execution\n\n");
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ printf(" xdp_exception\t\tDisplays xdp_exception tracepoint events\n"
+ " \t\t\tThis can occur due to internal driver errors, unrecognized\n"
+ " \t\t\tXDP actions and due to explicit user trigger by use of XDP_ABORTED\n"
+ " \t\t\tEach action is expanded below this field with its count\n"
+ " \t\t\t\thit/s - Number of times the tracepoint was hit per second\n\n");
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ printf(" devmap_xmit\t\tDisplays devmap_xmit tracepoint events\n"
+ " \t\t\tThis tracepoint is invoked for successful transmissions on output\n"
+ " \t\t\tdevice but these statistics are not available for generic XDP mode,\n"
+ " \t\t\thence they will be omitted from the output when using SKB mode\n"
+ " \t\t\t\txmit/s - Number of packets that were transmitted per second\n"
+ " \t\t\t\tdrop/s - Number of packets that failed transmissions per second\n"
+ " \t\t\t\tdrv_err/s - Number of internal driver errors per second\n"
+ " \t\t\t\tbulk-avg - Average number of packets processed for each event\n\n");
+ }
+}
+
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error)
+{
+ int i;
+
+ if (!error)
+ sample_print_help(mask);
+
+ printf("\n%s\nOption for %s:\n", doc, argv[0]);
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-15s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value: %d)",
+ *long_options[i].flag);
+ else
+ printf("\t short-option: -%c", long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ struct datarec *array;
+
+ array = calloc(nr_cpus, sizeof(*array));
+ if (!array) {
+ fprintf(stderr, "Failed to allocate memory (nr_cpus: %u)\n",
+ nr_cpus);
+ return NULL;
+ }
+ return array;
+}
+
+static int map_entry_init(struct map_entry *e, __u64 pair)
+{
+ e->pair = pair;
+ INIT_HLIST_NODE(&e->node);
+ e->val.timestamp = gettime();
+ e->val.cpu = alloc_record_per_cpu();
+ if (!e->val.cpu)
+ return -ENOMEM;
+ return 0;
+}
+
+static void map_collect_percpu(struct datarec *values, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = libbpf_num_possible_cpus();
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_processed = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = READ_ONCE(values[i].processed);
+ rec->cpu[i].dropped = READ_ONCE(values[i].dropped);
+ rec->cpu[i].issue = READ_ONCE(values[i].issue);
+ rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass);
+ rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop);
+ rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect);
+
+ sum_processed += rec->cpu[i].processed;
+ sum_dropped += rec->cpu[i].dropped;
+ sum_issue += rec->cpu[i].issue;
+ sum_xdp_pass += rec->cpu[i].xdp_pass;
+ sum_xdp_drop += rec->cpu[i].xdp_drop;
+ sum_xdp_redirect += rec->cpu[i].xdp_redirect;
+ }
+
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
+}
+
+static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ __u32 batch, count = 32;
+ struct datarec *values;
+ bool init = false;
+ __u64 *keys;
+ int i, ret;
+
+ keys = calloc(count, sizeof(__u64));
+ if (!keys)
+ return -ENOMEM;
+ values = calloc(count * nr_cpus, sizeof(struct datarec));
+ if (!values) {
+ free(keys);
+ return -ENOMEM;
+ }
+
+ for (;;) {
+ bool exit = false;
+
+ ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch,
+ keys, values, &count, NULL);
+ if (ret < 0 && errno != ENOENT)
+ break;
+ if (errno == ENOENT)
+ exit = true;
+
+ init = true;
+ for (i = 0; i < count; i++) {
+ struct map_entry *e, *x = NULL;
+ __u64 pair = keys[i];
+ struct datarec *arr;
+
+ arr = &values[i * nr_cpus];
+ hash_for_each_possible(rec->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (!x) {
+ x = calloc(1, sizeof(*x));
+ if (!x)
+ goto cleanup;
+ if (map_entry_init(x, pair) < 0) {
+ free(x);
+ goto cleanup;
+ }
+ hash_add(rec->xmit_map, &x->node, pair);
+ }
+ map_collect_percpu(arr, &x->val);
+ }
+
+ if (exit)
+ break;
+ count = 32;
+ }
+
+ free(values);
+ free(keys);
+ return 0;
+cleanup:
+ free(values);
+ free(keys);
+ return -ENOMEM;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int i;
+
+ rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record));
+ if (!rec) {
+ fprintf(stderr, "Failed to allocate memory\n");
+ return NULL;
+ }
+
+ if (sample_mask & SAMPLE_RX_CNT) {
+ rec->rx_cnt.cpu = alloc_record_per_cpu();
+ if (!rec->rx_cnt.cpu) {
+ fprintf(stderr,
+ "Failed to allocate rx_cnt per-CPU array\n");
+ goto end_rec;
+ }
+ }
+ if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) {
+ rec->redir_err[i].cpu = alloc_record_per_cpu();
+ if (!rec->redir_err[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate redir_err per-CPU array for "
+ "\"%s\" case\n",
+ xdp_redirect_err_names[i]);
+ while (i--)
+ free(rec->redir_err[i].cpu);
+ goto end_rx_cnt;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ rec->kthread.cpu = alloc_record_per_cpu();
+ if (!rec->kthread.cpu) {
+ fprintf(stderr,
+ "Failed to allocate kthread per-CPU array\n");
+ goto end_redir;
+ }
+ }
+ if (sample_mask & SAMPLE_EXCEPTION_CNT) {
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ rec->exception[i].cpu = alloc_record_per_cpu();
+ if (!rec->exception[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate exception per-CPU array for "
+ "\"%s\" case\n",
+ action2str(i));
+ while (i--)
+ free(rec->exception[i].cpu);
+ goto end_kthread;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ rec->devmap_xmit.cpu = alloc_record_per_cpu();
+ if (!rec->devmap_xmit.cpu) {
+ fprintf(stderr,
+ "Failed to allocate devmap_xmit per-CPU array\n");
+ goto end_exception;
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ hash_init(rec->xmit_map);
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ for (i = 0; i < sample_n_cpus; i++) {
+ rec->enq[i].cpu = alloc_record_per_cpu();
+ if (!rec->enq[i].cpu) {
+ fprintf(stderr,
+ "Failed to allocate enqueue per-CPU array for "
+ "CPU %d\n",
+ i);
+ while (i--)
+ free(rec->enq[i].cpu);
+ goto end_devmap_xmit;
+ }
+ }
+ }
+
+ return rec;
+
+end_devmap_xmit:
+ free(rec->devmap_xmit.cpu);
+end_exception:
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(rec->exception[i].cpu);
+end_kthread:
+ free(rec->kthread.cpu);
+end_redir:
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(rec->redir_err[i].cpu);
+end_rx_cnt:
+ free(rec->rx_cnt.cpu);
+end_rec:
+ free(rec);
+ return NULL;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ struct hlist_node *tmp;
+ struct map_entry *e;
+ int i;
+
+ for (i = 0; i < sample_n_cpus; i++)
+ free(r->enq[i].cpu);
+ hash_for_each_safe(r->xmit_map, i, tmp, e, node) {
+ hash_del(&e->node);
+ free(e->val.cpu);
+ free(e);
+ }
+ free(r->devmap_xmit.cpu);
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->exception[i].cpu);
+ free(r->kthread.cpu);
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(r->redir_err[i].cpu);
+ free(r->rx_cnt.cpu);
+ free(r);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double)period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double sample_round(double val)
+{
+ if (val - floor(val) < 0.5)
+ return floor(val);
+ return ceil(val);
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->dropped - p->dropped;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->info - p->info;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass,
+ double *xdp_drop, double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
+static void stats_get_rx_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus, struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->rx_cnt;
+ prev = &stats_prev->rx_cnt;
+ t = calc_period(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), ERR(err));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->rx_cnt.pps = pps;
+ out->rx_cnt.drop = drop;
+ out->rx_cnt.err = err;
+ out->totals.rx += pps;
+ out->totals.drop += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_cpumap_enqueue(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i, to_cpu;
+
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) {
+ rec = &stats_rec->enq[to_cpu];
+ prev = &stats_prev->enq[to_cpu];
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ if (pps > 0 || drop > 0) {
+ char str[64];
+
+ snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu);
+
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu);
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+ print_default(
+ " %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+ }
+}
+
+static void stats_get_cpumap_remote(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ double xdp_pass, xdp_drop, xdp_redirect;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass || xdp_drop || xdp_redirect) {
+ print_err(xdp_drop,
+ " %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ "xdp_stats", PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t);
+ if (!xdp_pass && !xdp_drop && !xdp_redirect)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+}
+
+static void stats_get_cpumap_kthread(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ print_err(drop, " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err,
+ "sched");
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), err, "sched");
+ }
+}
+
+static void stats_get_redirect_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps;
+ int i;
+
+ rec = &stats_rec->redir_err[0];
+ prev = &stats_prev->redir_err[0];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ if (!pps)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf "\n", str, REDIR(pps));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ out->redir_cnt.suc = pps;
+ out->totals.redir += pps;
+ }
+}
+
+static void stats_get_redirect_err_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, drop, sum = 0;
+ int rec_i, i;
+
+ for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) {
+ char str[64];
+
+ rec = &stats_rec->redir_err[rec_i];
+ prev = &stats_prev->redir_err[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ if (drop > 0 && !out) {
+ snprintf(str, sizeof(str),
+ sample_log_level & LL_DEFAULT ? "%s total" :
+ "%s",
+ xdp_redirect_err_names[rec_i]);
+ print_err(drop, " %-18s " FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ sum += drop;
+ }
+
+ if (out) {
+ out->redir_cnt.err = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_exception_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double t, drop, sum = 0;
+ struct record *rec, *prev;
+ int rec_i, i;
+
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ rec = &stats_rec->exception[rec_i];
+ prev = &stats_prev->exception[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ /* Fold out errors after heading */
+ sum += drop;
+
+ if (drop > 0 && !out) {
+ print_always(" %-18s " FMT_COLUMNf "\n",
+ action2str(rec_i), ERR(drop));
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n",
+ str, ERR(drop));
+ }
+ }
+ }
+
+ if (out) {
+ out->except_cnt.hits = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_devmap_xmit(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out)
+{
+ double pps, drop, info, err;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->devmap_xmit;
+ prev = &stats_prev->devmap_xmit;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(r, p, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n",
+ str, XMIT(pps), DROP(drop), err, "drv_err/s",
+ info, "bulk-avg");
+ }
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ info = calc_info_pps(&rec->total, &prev->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->xmit_cnt.pps = pps;
+ out->xmit_cnt.drop = drop;
+ out->xmit_cnt.bavg = info;
+ out->xmit_cnt.err = err;
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ unsigned int nr_cpus,
+ struct sample_output *out,
+ bool xmit_total)
+{
+ double pps, drop, info, err;
+ struct map_entry *entry;
+ struct record *r, *p;
+ double t;
+ int bkt;
+
+ hash_for_each(stats_rec->xmit_map, bkt, entry, node) {
+ struct map_entry *e, *x = NULL;
+ char ifname_from[IFNAMSIZ];
+ char ifname_to[IFNAMSIZ];
+ const char *fstr, *tstr;
+ unsigned long prev_time;
+ struct record beg = {};
+ __u32 from_idx, to_idx;
+ char str[128];
+ __u64 pair;
+ int i;
+
+ prev_time = sample_interval * NANOSEC_PER_SEC;
+
+ pair = entry->pair;
+ from_idx = pair >> 32;
+ to_idx = pair & 0xFFFFFFFF;
+
+ r = &entry->val;
+ beg.timestamp = r->timestamp - prev_time;
+
+ /* Find matching entry from stats_prev map */
+ hash_for_each_possible(stats_prev->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (x)
+ p = &x->val;
+ else
+ p = &beg;
+ t = calc_period(r, p);
+ pps = calc_pps(&r->total, &p->total, t);
+ drop = calc_drop_pps(&r->total, &p->total, t);
+ info = calc_info_pps(&r->total, &p->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&r->total, &p->total, t);
+
+ if (out) {
+ /* We are responsible for filling out totals */
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ continue;
+ }
+
+ fstr = tstr = NULL;
+ if (if_indextoname(from_idx, ifname_from))
+ fstr = ifname_from;
+ if (if_indextoname(to_idx, ifname_to))
+ tstr = ifname_to;
+
+ snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?",
+ tstr ?: "?");
+ /* Skip idle streams of redirection */
+ if (pps || drop || err) {
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop),
+ err, "drv_err/s", info, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *rc = &r->cpu[i];
+ struct datarec *pc, p_beg = {};
+ char str[64];
+
+ pc = p == &beg ? &p_beg : &p->cpu[i];
+
+ pps = calc_pps(rc, pc, t);
+ drop = calc_drop_pps(rc, pc, t);
+ err = calc_errs_pps(rc, pc, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(rc, pc, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps),
+ DROP(drop), err, "drv_err/s", info, "bulk-avg");
+ }
+ }
+}
+
+static void stats_print(const char *prefix, int mask, struct stats_record *r,
+ struct stats_record *p, struct sample_output *out)
+{
+ int nr_cpus = libbpf_num_possible_cpus();
+ const char *str;
+
+ print_always("%-23s", prefix ?: "Summary");
+ if (mask & SAMPLE_RX_CNT)
+ print_always(FMT_COLUMNl, RX(out->totals.rx));
+ if (mask & SAMPLE_REDIRECT_CNT)
+ print_always(FMT_COLUMNl, REDIR(out->totals.redir));
+ printf(FMT_COLUMNl,
+ out->totals.err + out->totals.drop + out->totals.drop_xmit,
+ "err,drop/s");
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT ||
+ mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ printf(FMT_COLUMNl, XMIT(out->totals.xmit));
+ printf("\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ?
+ "receive total" :
+ "receive";
+ print_err((out->rx_cnt.err || out->rx_cnt.drop),
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n",
+ str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop),
+ ERR(out->rx_cnt.err));
+
+ stats_get_rx_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ stats_get_cpumap_enqueue(r, p, nr_cpus);
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ stats_get_cpumap_kthread(r, p, nr_cpus);
+ stats_get_cpumap_remote(r, p, nr_cpus);
+ }
+
+ if (mask & SAMPLE_REDIRECT_CNT) {
+ str = out->redir_cnt.suc ? "redirect total" : "redirect";
+ print_default(" %-20s " FMT_COLUMNl "\n", str,
+ REDIR(out->redir_cnt.suc));
+
+ stats_get_redirect_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ?
+ "redirect_err total" :
+ "redirect_err";
+ print_err(out->redir_cnt.err, " %-20s " FMT_COLUMNl "\n", str,
+ ERR(out->redir_cnt.err));
+
+ stats_get_redirect_err_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ str = out->except_cnt.hits ? "xdp_exception total" :
+ "xdp_exception";
+
+ print_err(out->except_cnt.hits, " %-20s " FMT_COLUMNl "\n", str,
+ HITS(out->except_cnt.hits));
+
+ stats_get_exception_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ?
+ "devmap_xmit total" :
+ "devmap_xmit";
+
+ print_err(out->xmit_cnt.err || out->xmit_cnt.drop,
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl
+ __COLUMN(".2f") "\n",
+ str, XMIT(out->xmit_cnt.pps),
+ DROP(out->xmit_cnt.drop), out->xmit_cnt.err,
+ "drv_err/s", out->xmit_cnt.bavg, "bulk-avg");
+
+ stats_get_devmap_xmit(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+
+ if (sample_log_level & LL_DEFAULT ||
+ ((sample_log_level & LL_SIMPLE) && sample_err_exp)) {
+ sample_err_exp = false;
+ printf("\n");
+ }
+}
+
+int sample_setup_maps(struct bpf_map **maps)
+{
+ sample_n_cpus = libbpf_num_possible_cpus();
+
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ sample_map[i] = maps[i];
+
+ switch (i) {
+ case MAP_RX:
+ case MAP_CPUMAP_KTHREAD:
+ case MAP_DEVMAP_XMIT:
+ sample_map_count[i] = sample_n_cpus;
+ break;
+ case MAP_REDIRECT_ERR:
+ sample_map_count[i] =
+ XDP_REDIRECT_ERR_MAX * sample_n_cpus;
+ break;
+ case MAP_EXCEPTION:
+ sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus;
+ case MAP_CPUMAP_ENQUEUE:
+ sample_map_count[i] = sample_n_cpus * sample_n_cpus;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (bpf_map__resize(sample_map[i], sample_map_count[i]) < 0)
+ return -errno;
+ }
+ sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
+ return 0;
+}
+
+static int sample_setup_maps_mappings(void)
+{
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ size_t size = sample_map_count[i] * sizeof(struct datarec);
+
+ sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bpf_map__fd(sample_map[i]), 0);
+ if (sample_mmap[i] == MAP_FAILED)
+ return -errno;
+ }
+ return 0;
+}
+
+int __sample_init(int mask)
+{
+ sigset_t st;
+
+ sigemptyset(&st);
+ sigaddset(&st, SIGQUIT);
+ sigaddset(&st, SIGINT);
+ sigaddset(&st, SIGTERM);
+
+ if (sigprocmask(SIG_BLOCK, &st, NULL) < 0)
+ return -errno;
+
+ sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK);
+ if (sample_sig_fd < 0)
+ return -errno;
+
+ sample_mask = mask;
+
+ return sample_setup_maps_mappings();
+}
+
+static int __sample_remove_xdp(int ifindex, __u32 prog_id, int xdp_flags)
+{
+ __u32 cur_prog_id = 0;
+ int ret;
+
+ if (prog_id) {
+ ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags);
+ if (ret < 0)
+ return -errno;
+
+ if (prog_id != cur_prog_id) {
+ print_always(
+ "Program on ifindex %d does not match installed "
+ "program, skipping unload\n",
+ ifindex);
+ return -ENOENT;
+ }
+ }
+
+ return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+}
+
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force)
+{
+ int ret, xdp_flags = 0;
+ __u32 prog_id = 0;
+
+ if (sample_xdp_cnt == 32) {
+ fprintf(stderr,
+ "Total limit for installed XDP programs in a sample reached\n");
+ return -ENOTSUP;
+ }
+
+ xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0;
+ xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+ ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog),
+ xdp_flags);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to install program \"%s\" on ifindex %d, mode = %s, "
+ "force = %s: %s\n",
+ bpf_program__name(xdp_prog), ifindex,
+ generic ? "skb" : "native", force ? "true" : "false",
+ strerror(-ret));
+ return ret;
+ }
+
+ ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to get XDP program id for ifindex %d, removing program: %s\n",
+ ifindex, strerror(errno));
+ __sample_remove_xdp(ifindex, 0, xdp_flags);
+ return ret;
+ }
+ sample_xdp_progs[sample_xdp_cnt++] =
+ (struct xdp_desc){ ifindex, prog_id, xdp_flags };
+
+ return 0;
+}
+
+static void sample_summary_print(void)
+{
+ double period = sample_out.rx_cnt.pps;
+
+ if (sample_out.totals.rx) {
+ double pkts = sample_out.totals.rx;
+
+ print_always(" Packets received : %'-10llu\n",
+ sample_out.totals.rx);
+ print_always(" Average packets/s : %'-10.0f\n",
+ sample_round(pkts / period));
+ }
+ if (sample_out.totals.redir) {
+ double pkts = sample_out.totals.redir;
+
+ print_always(" Packets redirected : %'-10llu\n",
+ sample_out.totals.redir);
+ print_always(" Average redir/s : %'-10.0f\n",
+ sample_round(pkts / period));
+ }
+ if (sample_out.totals.drop)
+ print_always(" Rx dropped : %'-10llu\n",
+ sample_out.totals.drop);
+ if (sample_out.totals.drop_xmit)
+ print_always(" Tx dropped : %'-10llu\n",
+ sample_out.totals.drop_xmit);
+ if (sample_out.totals.err)
+ print_always(" Errors recorded : %'-10llu\n",
+ sample_out.totals.err);
+ if (sample_out.totals.xmit) {
+ double pkts = sample_out.totals.xmit;
+
+ print_always(" Packets transmitted : %'-10llu\n",
+ sample_out.totals.xmit);
+ print_always(" Average transmit/s : %'-10.0f\n",
+ sample_round(pkts / period));
+ }
+}
+
+void sample_exit(int status)
+{
+ size_t size;
+
+ for (int i = 0; i < NUM_MAP; i++) {
+ size = sample_map_count[i] * sizeof(**sample_mmap);
+ munmap(sample_mmap[i], size);
+ }
+ while (sample_xdp_cnt--) {
+ int i = sample_xdp_cnt, ifindex, xdp_flags;
+ __u32 prog_id;
+
+ prog_id = sample_xdp_progs[i].prog_id;
+ ifindex = sample_xdp_progs[i].ifindex;
+ xdp_flags = sample_xdp_progs[i].flags;
+
+ __sample_remove_xdp(ifindex, prog_id, xdp_flags);
+ }
+ sample_summary_print();
+ close(sample_sig_fd);
+ exit(status);
+}
+
+static int sample_stats_collect(struct stats_record *rec)
+{
+ int i;
+
+ if (sample_mask & SAMPLE_RX_CNT)
+ map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt);
+
+ if (sample_mask & SAMPLE_REDIRECT_CNT)
+ map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]);
+
+ if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) {
+ for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus],
+ &rec->redir_err[i]);
+ }
+
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ for (i = 0; i < sample_n_cpus; i++)
+ map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus],
+ &rec->enq[i]);
+
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT)
+ map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD],
+ &rec->kthread);
+
+ if (sample_mask & SAMPLE_EXCEPTION_CNT)
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus],
+ &rec->exception[i]);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT)
+ map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) {
+ if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void sample_summary_update(struct sample_output *out, int interval)
+{
+ sample_out.totals.rx += out->totals.rx;
+ sample_out.totals.redir += out->totals.redir;
+ sample_out.totals.drop += out->totals.drop;
+ sample_out.totals.drop_xmit += out->totals.drop_xmit;
+ sample_out.totals.err += out->totals.err;
+ sample_out.totals.xmit += out->totals.xmit;
+ sample_out.rx_cnt.pps += interval;
+}
+
+static void sample_stats_print(int mask, struct stats_record *cur,
+ struct stats_record *prev, char *prog_name,
+ int interval)
+{
+ struct sample_output out = {};
+
+ if (mask & SAMPLE_RX_CNT)
+ stats_get_rx_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_CNT)
+ stats_get_redirect_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_ERR_CNT)
+ stats_get_redirect_err_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_EXCEPTION_CNT)
+ stats_get_exception_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT)
+ stats_get_devmap_xmit(cur, prev, 0, &out);
+ else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(cur, prev, 0, &out,
+ mask & SAMPLE_DEVMAP_XMIT_CNT);
+ sample_summary_update(&out, interval);
+
+ stats_print(prog_name, mask, cur, prev, &out);
+}
+
+void sample_switch_mode(void)
+{
+ sample_log_level ^= LL_DEBUG - 1;
+}
+
+static int sample_signal_cb(void)
+{
+ struct signalfd_siginfo si;
+ int r;
+
+ r = read(sample_sig_fd, &si, sizeof(si));
+ if (r < 0)
+ return -errno;
+
+ switch (si.ssi_signo) {
+ case SIGQUIT:
+ sample_switch_mode();
+ printf("\n");
+ break;
+ default:
+ printf("\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Pointer swap trick */
+static void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static int sample_timer_cb(int timerfd, struct stats_record **rec,
+ struct stats_record **prev, int interval)
+{
+ char line[64] = "Summary";
+ int ret;
+ __u64 t;
+
+ ret = read(timerfd, &t, sizeof(t));
+ if (ret < 0)
+ return -errno;
+
+ swap(prev, rec);
+ ret = sample_stats_collect(*rec);
+ if (ret < 0)
+ return ret;
+
+ if (sample_xdp_cnt == 2 && !(sample_mask & SAMPLE_SKIP_HEADING)) {
+ char fi[IFNAMSIZ];
+ char to[IFNAMSIZ];
+ const char *f, *t;
+
+ f = t = NULL;
+ if (if_indextoname(sample_xdp_progs[0].ifindex, fi))
+ f = fi;
+ if (if_indextoname(sample_xdp_progs[1].ifindex, to))
+ t = to;
+
+ snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
+ }
+
+ sample_stats_print(sample_mask, *rec, *prev, line, interval);
+ return 0;
+}
+
+int sample_run(int interval, void (*post_cb)(void *), void *ctx)
+{
+ struct timespec ts = { interval, 0 };
+ struct itimerspec its = { ts, ts };
+ struct stats_record *rec, *prev;
+ struct pollfd pfd[2] = {};
+ int timerfd, ret;
+
+ if (!interval) {
+ fprintf(stderr, "Incorrect interval 0\n");
+ return -EINVAL;
+ }
+ sample_interval = interval;
+ /* Pretty print numbers */
+ setlocale(LC_NUMERIC, "en_US.UTF-8");
+
+ timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ if (timerfd < 0)
+ return -errno;
+ timerfd_settime(timerfd, 0, &its, NULL);
+
+ pfd[0].fd = sample_sig_fd;
+ pfd[0].events = POLLIN;
+
+ pfd[1].fd = timerfd;
+ pfd[1].events = POLLIN;
+
+ ret = -ENOMEM;
+ rec = alloc_stats_record();
+ if (!rec)
+ goto end;
+ prev = alloc_stats_record();
+ if (!prev)
+ goto end_rec;
+
+ ret = sample_stats_collect(rec);
+ if (ret < 0)
+ goto end_rec_prev;
+
+ for (;;) {
+ ret = poll(pfd, 2, -1);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ else
+ break;
+ }
+
+ if (pfd[0].revents & POLLIN)
+ ret = sample_signal_cb();
+ else if (pfd[1].revents & POLLIN)
+ ret = sample_timer_cb(timerfd, &rec, &prev, interval);
+
+ if (ret)
+ break;
+
+ if (post_cb)
+ post_cb(ctx);
+ }
+
+end_rec_prev:
+ free_stats_record(prev);
+end_rec:
+ free_stats_record(rec);
+end:
+ close(timerfd);
+
+ return ret;
+}
+
+const char *get_driver_name(int ifindex)
+{
+ struct ethtool_drvinfo drv = {};
+ char ifname[IF_NAMESIZE];
+ static char drvname[32];
+ struct ifreq ifr = {};
+ int fd, r = 0;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return "[error]";
+
+ if (!if_indextoname(ifindex, ifname))
+ goto end;
+
+ drv.cmd = ETHTOOL_GDRVINFO;
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ifr.ifr_data = (void *)&drv;
+
+ r = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (r)
+ goto end;
+
+ safe_strncpy(drvname, drv.driver, sizeof(drvname));
+
+ close(fd);
+ return drvname;
+
+end:
+ r = errno;
+ close(fd);
+ return r == EOPNOTSUPP ? "loopback" : "[error]";
+}
+
+int get_mac_addr(int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr = {};
+ int fd, r;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (!if_indextoname(ifindex, ifname)) {
+ r = -errno;
+ goto end;
+ }
+
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ r = ioctl(fd, SIOCGIFHWADDR, &ifr);
+ if (r) {
+ r = -errno;
+ goto end;
+ }
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+
+end:
+ close(fd);
+ return r;
+}
+
+__attribute__((constructor)) static void sample_ctor(void)
+{
+ if (libbpf_set_strict_mode(LIBBPF_STRICT_ALL) < 0) {
+ fprintf(stderr, "Failed to set libbpf strict mode: %s\n",
+ strerror(errno));
+ /* Just exit, nothing to cleanup right now */
+ exit(EXIT_FAIL_BPF);
+ }
+}
diff --git a/samples/bpf/xdp_sample_user.h b/samples/bpf/xdp_sample_user.h
new file mode 100644
index 000000000000..d97465ff8c62
--- /dev/null
+++ b/samples/bpf/xdp_sample_user.h
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef XDP_SAMPLE_USER_H
+#define XDP_SAMPLE_USER_H
+
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "xdp_sample_shared.h"
+
+enum stats_mask {
+ _SAMPLE_REDIRECT_MAP = 1U << 0,
+ SAMPLE_RX_CNT = 1U << 1,
+ SAMPLE_REDIRECT_ERR_CNT = 1U << 2,
+ SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3,
+ SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4,
+ SAMPLE_EXCEPTION_CNT = 1U << 5,
+ SAMPLE_DEVMAP_XMIT_CNT = 1U << 6,
+ SAMPLE_REDIRECT_CNT = 1U << 7,
+ SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8,
+ SAMPLE_SKIP_HEADING = 1U << 9,
+};
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+int sample_setup_maps(struct bpf_map **maps);
+int __sample_init(int mask);
+void sample_exit(int status);
+int sample_run(int interval, void (*post_cb)(void *), void *ctx);
+
+void sample_switch_mode(void);
+int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
+ bool force);
+void sample_usage(char *argv[], const struct option *long_options,
+ const char *doc, int mask, bool error);
+
+const char *get_driver_name(int ifindex);
+int get_mac_addr(int ifindex, void *mac_addr);
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+__attribute__((unused))
+static inline char *safe_strncpy(char *dst, const char *src, size_t size)
+{
+ if (!size)
+ return dst;
+ strncpy(dst, src, size - 1);
+ dst[size - 1] = '\0';
+ return dst;
+}
+#pragma GCC diagnostic pop
+
+#define __attach_tp(name) \
+ ({ \
+ if (!bpf_program__is_tracing(skel->progs.name)) \
+ return -EINVAL; \
+ skel->links.name = bpf_program__attach(skel->progs.name); \
+ if (!skel->links.name) \
+ return -errno; \
+ })
+
+#define sample_init_pre_load(skel) \
+ ({ \
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \
+ sample_setup_maps((struct bpf_map *[]){ \
+ skel->maps.rx_cnt, skel->maps.redir_err_cnt, \
+ skel->maps.cpumap_enqueue_cnt, \
+ skel->maps.cpumap_kthread_cnt, \
+ skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \
+ skel->maps.devmap_xmit_cnt_multi }); \
+ })
+
+#define DEFINE_SAMPLE_INIT(name) \
+ static int sample_init(struct name *skel, int mask) \
+ { \
+ int ret; \
+ ret = __sample_init(mask); \
+ if (ret < 0) \
+ return ret; \
+ if (mask & SAMPLE_REDIRECT_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map); \
+ if (mask & SAMPLE_REDIRECT_CNT) \
+ __attach_tp(tp_xdp_redirect); \
+ if (mask & SAMPLE_REDIRECT_ERR_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map_err); \
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) \
+ __attach_tp(tp_xdp_redirect_err); \
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) \
+ __attach_tp(tp_xdp_cpumap_enqueue); \
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) \
+ __attach_tp(tp_xdp_cpumap_kthread); \
+ if (mask & SAMPLE_EXCEPTION_CNT) \
+ __attach_tp(tp_xdp_exception); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) \
+ __attach_tp(tp_xdp_devmap_xmit); \
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \
+ __attach_tp(tp_xdp_devmap_xmit_multi); \
+ return 0; \
+ }
+
+#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2db6925e04f4..791f31dd0abe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
- __u32 attach_type; /* program attach type */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
union bpf_iter_link_info {
@@ -993,6 +993,7 @@ enum bpf_attach_type {
BPF_SK_SKB_VERDICT,
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+ BPF_PERF_EVENT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1006,6 +1007,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
+ BPF_LINK_TYPE_PERF_EVENT = 7,
MAX_BPF_LINK_TYPE,
};
@@ -1446,6 +1448,13 @@ union bpf_attr {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
};
+ struct {
+ /* black box user-provided value passed through
+ * to BPF program at the execution time and
+ * accessible through bpf_get_attach_cookie() BPF helper
+ */
+ __u64 bpf_cookie;
+ } perf_event;
};
} link_create;
@@ -4847,6 +4856,27 @@ union bpf_attr {
* Get address of the traced function (for tracing and kprobe programs).
* Return
* Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ * Description
+ * Get bpf_cookie value provided (optionally) during the program
+ * attachment. It might be different for each individual
+ * attachment, even if BPF program itself is the same.
+ * Expects BPF program context *ctx* as a first argument.
+ *
+ * Supported for the following program types:
+ * - kprobe/uprobe;
+ * - tracepoint;
+ * - perf_event.
+ * Return
+ * Value specified by user at BPF link creation/attachment time
+ * or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ * Description
+ * Get the struct pt_regs associated with **task**.
+ * Return
+ * A pointer to struct pt_regs.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5023,6 +5053,8 @@ union bpf_attr {
FN(timer_start), \
FN(timer_cancel), \
FN(get_func_ip), \
+ FN(get_attach_cookie), \
+ FN(task_pt_regs), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h
index c86c3e942df9..47afae3895ec 100644
--- a/tools/include/uapi/linux/ethtool.h
+++ b/tools/include/uapi/linux/ethtool.h
@@ -48,4 +48,57 @@ struct ethtool_channels {
__u32 combined_count;
};
+#define ETHTOOL_FWVERS_LEN 32
+#define ETHTOOL_BUSINFO_LEN 32
+#define ETHTOOL_EROMVERS_LEN 32
+
+/**
+ * struct ethtool_drvinfo - general driver and device information
+ * @cmd: Command number = %ETHTOOL_GDRVINFO
+ * @driver: Driver short name. This should normally match the name
+ * in its bus driver structure (e.g. pci_driver::name). Must
+ * not be an empty string.
+ * @version: Driver version string; may be an empty string
+ * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
+ * @bus_info: Device bus address. This should match the dev_name()
+ * string for the underlying bus device, if there is one. May be
+ * an empty string.
+ * @reserved2: Reserved for future use; see the note on reserved space.
+ * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
+ * %ETHTOOL_SPFLAGS commands; also the number of strings in the
+ * %ETH_SS_PRIV_FLAGS set
+ * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
+ * command; also the number of strings in the %ETH_SS_STATS set
+ * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
+ * command; also the number of strings in the %ETH_SS_TEST set
+ * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
+ * and %ETHTOOL_SEEPROM commands, in bytes
+ * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
+ * command, in bytes
+ *
+ * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
+ * strings in any string set (from Linux 2.6.34).
+ *
+ * Drivers should set at most @driver, @version, @fw_version and
+ * @bus_info in their get_drvinfo() implementation. The ethtool
+ * core fills in the other fields using other driver operations.
+ */
+struct ethtool_drvinfo {
+ __u32 cmd;
+ char driver[32];
+ char version[32];
+ char fw_version[ETHTOOL_FWVERS_LEN];
+ char bus_info[ETHTOOL_BUSINFO_LEN];
+ char erom_version[ETHTOOL_EROMVERS_LEN];
+ char reserved2[12];
+ __u32 n_priv_flags;
+ __u32 n_stats;
+ __u32 testinfo_len;
+ __u32 eedump_len;
+ __u32 regdump_len;
+};
+
+#define ETHTOOL_GDRVINFO 0x00000003
+
#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index ec14aa725bb0..74c3b73a5fbe 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -4,8 +4,9 @@
RM ?= rm
srctree = $(abs_srctree)
+VERSION_SCRIPT := libbpf.map
LIBBPF_VERSION := $(shell \
- grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
+ grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \
sort -rV | head -n1 | cut -d'_' -f2)
LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
@@ -110,7 +111,6 @@ SHARED_OBJDIR := $(OUTPUT)sharedobjs/
STATIC_OBJDIR := $(OUTPUT)staticobjs/
BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o
BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o
-VERSION_SCRIPT := libbpf.map
BPF_HELPER_DEFS := $(OUTPUT)bpf_helper_defs.h
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
@@ -163,10 +163,10 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
$(QUIET_LINK)$(CC) $(LDFLAGS) \
--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
- -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@
+ -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
@@ -181,7 +181,7 @@ $(OUTPUT)libbpf.pc:
check: check_abi
-check_abi: $(OUTPUT)libbpf.so
+check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \
echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \
"($(GLOBAL_SYM_COUNT)) does NOT match with num of" \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 86dcac44f32f..2401fad090c5 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -684,8 +684,13 @@ int bpf_link_create(int prog_fd, int target_fd,
iter_info_len = OPTS_GET(opts, iter_info_len, 0);
target_btf_id = OPTS_GET(opts, target_btf_id, 0);
- if (iter_info_len && target_btf_id)
- return libbpf_err(-EINVAL);
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (iter_info_len || target_btf_id) {
+ if (iter_info_len && target_btf_id)
+ return libbpf_err(-EINVAL);
+ if (!OPTS_ZEROED(opts, target_btf_id))
+ return libbpf_err(-EINVAL);
+ }
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
@@ -693,14 +698,27 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.attach_type = attach_type;
attr.link_create.flags = OPTS_GET(opts, flags, 0);
- if (iter_info_len) {
- attr.link_create.iter_info =
- ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
- attr.link_create.iter_info_len = iter_info_len;
- } else if (target_btf_id) {
+ if (target_btf_id) {
attr.link_create.target_btf_id = target_btf_id;
+ goto proceed;
}
+ switch (attach_type) {
+ case BPF_TRACE_ITER:
+ attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = iter_info_len;
+ break;
+ case BPF_PERF_EVENT:
+ attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0);
+ if (!OPTS_ZEROED(opts, perf_event))
+ return libbpf_err(-EINVAL);
+ break;
+ default:
+ if (!OPTS_ZEROED(opts, flags))
+ return libbpf_err(-EINVAL);
+ break;
+ }
+proceed:
fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
return libbpf_err_errno(fd);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 4f758f8f50cd..6fffb3cdf39b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -177,8 +177,14 @@ struct bpf_link_create_opts {
union bpf_iter_link_info *iter_info;
__u32 iter_info_len;
__u32 target_btf_id;
+ union {
+ struct {
+ __u64 bpf_cookie;
+ } perf_event;
+ };
+ size_t :0;
};
-#define bpf_link_create_opts__last_field target_btf_id
+#define bpf_link_create_opts__last_field perf_event
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index cb106e8c42cb..88d8825fc6f6 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -193,6 +193,8 @@ enum kern_feature_id {
FEAT_MODULE_BTF,
/* BTF_KIND_FLOAT support */
FEAT_BTF_FLOAT,
+ /* BPF perf link support */
+ FEAT_PERF_LINK,
__FEAT_CNT,
};
@@ -4337,6 +4339,37 @@ static int probe_module_btf(void)
return !err;
}
+static int probe_perf_link(void)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, link_fd, err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+ prog_fd = bpf_load_program_xattr(&attr, NULL, 0);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* use invalid perf_event FD to get EBADF, if link is supported;
+ * otherwise EINVAL should be returned
+ */
+ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -4387,6 +4420,9 @@ static struct kern_feature_desc {
[FEAT_BTF_FLOAT] = {
"BTF_KIND_FLOAT support", probe_kern_btf_float,
},
+ [FEAT_PERF_LINK] = {
+ "BPF perf link support", probe_perf_link,
+ },
};
static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -5277,11 +5313,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
insn[1].imm = ext->kcfg.data_off;
} else /* EXT_KSYM */ {
- if (ext->ksym.type_id) { /* typed ksyms */
+ if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
insn[0].imm = ext->ksym.kernel_btf_id;
insn[1].imm = ext->ksym.kernel_btf_obj_fd;
- } else { /* typeless ksyms */
+ } else { /* typeless ksyms or unresolved typed ksyms */
insn[0].imm = (__u32)ext->ksym.addr;
insn[1].imm = ext->ksym.addr >> 32;
}
@@ -6608,11 +6644,8 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
break;
}
}
- if (id <= 0) {
- pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
- __btf_kind_str(kind), ksym_name);
+ if (id <= 0)
return -ESRCH;
- }
*res_btf = btf;
*res_btf_fd = btf_fd;
@@ -6629,8 +6662,13 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
struct btf *btf = NULL;
id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
- if (id < 0)
+ if (id == -ESRCH && ext->is_weak) {
+ return 0;
+ } else if (id < 0) {
+ pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
+ ext->name);
return id;
+ }
/* find local type_id */
local_type_id = ext->ksym.type_id;
@@ -8808,7 +8846,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_link {
int (*detach)(struct bpf_link *link);
- int (*destroy)(struct bpf_link *link);
+ void (*dealloc)(struct bpf_link *link);
char *pin_path; /* NULL, if not pinned */
int fd; /* hook FD, -1 if not applicable */
bool disconnected;
@@ -8847,11 +8885,12 @@ int bpf_link__destroy(struct bpf_link *link)
if (!link->disconnected && link->detach)
err = link->detach(link);
- if (link->destroy)
- link->destroy(link);
if (link->pin_path)
free(link->pin_path);
- free(link);
+ if (link->dealloc)
+ link->dealloc(link);
+ else
+ free(link);
return libbpf_err(err);
}
@@ -8948,23 +8987,42 @@ int bpf_link__unpin(struct bpf_link *link)
return 0;
}
-static int bpf_link__detach_perf_event(struct bpf_link *link)
+struct bpf_link_perf {
+ struct bpf_link link;
+ int perf_event_fd;
+};
+
+static int bpf_link_perf_detach(struct bpf_link *link)
{
- int err;
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+ int err = 0;
- err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
- if (err)
+ if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
err = -errno;
+ if (perf_link->perf_event_fd != link->fd)
+ close(perf_link->perf_event_fd);
close(link->fd);
+
return libbpf_err(err);
}
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+static void bpf_link_perf_dealloc(struct bpf_link *link)
+{
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+ free(perf_link);
+}
+
+struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link *link;
- int prog_fd, err;
+ struct bpf_link_perf *link;
+ int prog_fd, link_fd = -1, err;
+
+ if (!OPTS_VALID(opts, bpf_perf_event_opts))
+ return libbpf_err_ptr(-EINVAL);
if (pfd < 0) {
pr_warn("prog '%s': invalid perf event FD %d\n",
@@ -8981,27 +9039,59 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pf
link = calloc(1, sizeof(*link));
if (!link)
return libbpf_err_ptr(-ENOMEM);
- link->detach = &bpf_link__detach_perf_event;
- link->fd = pfd;
+ link->link.detach = &bpf_link_perf_detach;
+ link->link.dealloc = &bpf_link_perf_dealloc;
+ link->perf_event_fd = pfd;
- if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
- err = -errno;
- free(link);
- pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
- prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- if (err == -EPROTO)
- pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
- prog->name, pfd);
- return libbpf_err_ptr(err);
+ if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
+ .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
+
+ link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
+ if (link_fd < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
+ prog->name, pfd,
+ err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_out;
+ }
+ link->link.fd = link_fd;
+ } else {
+ if (OPTS_GET(opts, bpf_cookie, 0)) {
+ pr_warn("prog '%s': user context value is not supported\n", prog->name);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ if (err == -EPROTO)
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ prog->name, pfd);
+ goto err_out;
+ }
+ link->link.fd = pfd;
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
- free(link);
- pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+ pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return libbpf_err_ptr(err);
+ goto err_out;
}
- return link;
+
+ return &link->link;
+err_out:
+ if (link_fd >= 0)
+ close(link_fd);
+ free(link);
+ return libbpf_err_ptr(err);
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+{
+ return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
}
/*
@@ -9062,13 +9152,19 @@ static int determine_uprobe_retprobe_bit(void)
return parse_uint_from_file(file, "config:%d\n");
}
+#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
+#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
+
static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
- uint64_t offset, int pid)
+ uint64_t offset, int pid, size_t ref_ctr_off)
{
struct perf_event_attr attr = {};
char errmsg[STRERR_BUFSIZE];
int type, pfd, err;
+ if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
+ return -EINVAL;
+
type = uprobe ? determine_uprobe_perf_type()
: determine_kprobe_perf_type();
if (type < 0) {
@@ -9091,6 +9187,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
}
attr.size = sizeof(attr);
attr.type = type;
+ attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
attr.config2 = offset; /* kprobe_addr or probe_offset */
@@ -9112,8 +9209,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
struct bpf_link *
bpf_program__attach_kprobe_opts(struct bpf_program *prog,
const char *func_name,
- struct bpf_kprobe_opts *opts)
+ const struct bpf_kprobe_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
unsigned long offset;
@@ -9125,16 +9223,17 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog,
retprobe = OPTS_GET(opts, retprobe, false);
offset = OPTS_GET(opts, offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
- offset, -1 /* pid */);
+ offset, -1 /* pid */, 0 /* ref_ctr_off */);
if (pfd < 0) {
pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -9189,17 +9288,27 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
return link;
}
-struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
- bool retprobe, pid_t pid,
- const char *binary_path,
- size_t func_offset)
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
+ size_t ref_ctr_off;
int pfd, err;
+ bool retprobe;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
- pfd = perf_event_open_probe(true /* uprobe */, retprobe,
- binary_path, func_offset, pid);
+ retprobe = OPTS_GET(opts, retprobe, false);
+ ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
+ func_offset, pid, ref_ctr_off);
if (pfd < 0) {
pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
@@ -9207,7 +9316,7 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -9220,6 +9329,16 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+ bool retprobe, pid_t pid,
+ const char *binary_path,
+ size_t func_offset)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
+
+ return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
+}
+
static int determine_tracepoint_id(const char *tp_category,
const char *tp_name)
{
@@ -9270,14 +9389,21 @@ static int perf_event_open_tracepoint(const char *tp_category,
return pfd;
}
-struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
- const char *tp_category,
- const char *tp_name)
+struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int pfd, err;
+ if (!OPTS_VALID(opts, bpf_tracepoint_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
pfd = perf_event_open_tracepoint(tp_category, tp_name);
if (pfd < 0) {
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
@@ -9285,7 +9411,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -9297,6 +9423,13 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name)
+{
+ return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
+}
+
static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1271d99bb7aa..f177d897c5f7 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -104,17 +104,6 @@ struct bpf_object_open_opts {
};
#define bpf_object_open_opts__last_field btf_custom_path
-struct bpf_kprobe_opts {
- /* size of this struct, for forward/backward compatiblity */
- size_t sz;
- /* function's offset to install kprobe to */
- unsigned long offset;
- /* kprobe is return probe */
- bool retprobe;
- size_t :0;
-};
-#define bpf_kprobe_opts__last_field retprobe
-
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
LIBBPF_API struct bpf_object *
bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts);
@@ -255,24 +244,86 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
bpf_program__attach(struct bpf_program *prog);
+
+struct bpf_perf_event_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+};
+#define bpf_perf_event_opts__last_field bpf_cookie
+
LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts);
+
+struct bpf_kprobe_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* function's offset to install kprobe to */
+ unsigned long offset;
+ /* kprobe is return probe */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_kprobe_opts__last_field retprobe
+
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
const char *func_name);
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe_opts(struct bpf_program *prog,
const char *func_name,
- struct bpf_kprobe_opts *opts);
+ const struct bpf_kprobe_opts *opts);
+
+struct bpf_uprobe_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* offset of kernel reference counted USDT semaphore, added in
+ * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+ */
+ size_t ref_ctr_offset;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* uprobe is return probe, invoked at function return time */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_uprobe_opts__last_field retprobe
+
LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
pid_t pid, const char *binary_path,
size_t func_offset);
LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts);
+
+struct bpf_tracepoint_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+};
+#define bpf_tracepoint_opts__last_field bpf_cookie
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_tracepoint(struct bpf_program *prog,
const char *tp_category,
const char *tp_name);
LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts);
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name);
LIBBPF_API struct bpf_link *
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 58e0fb2c482f..bbc53bb25f68 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -374,6 +374,9 @@ LIBBPF_0.5.0 {
bpf_map__pin_path;
bpf_map_lookup_and_delete_elem_flags;
bpf_program__attach_kprobe_opts;
+ bpf_program__attach_perf_event_opts;
+ bpf_program__attach_tracepoint_opts;
+ bpf_program__attach_uprobe_opts;
bpf_object__gen_loader;
btf__load_from_kernel_by_id;
btf__load_from_kernel_by_id_split;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index f7b691d5f9eb..533b0211f40a 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -196,6 +196,17 @@ void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
size_t cur_cnt, size_t max_cnt, size_t add_cnt);
int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+static inline bool libbpf_is_mem_zeroed(const char *p, ssize_t len)
+{
+ while (len > 0) {
+ if (*p)
+ return false;
+ p++;
+ len--;
+ }
+ return true;
+}
+
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
const char *type_name)
@@ -204,16 +215,9 @@ static inline bool libbpf_validate_opts(const char *opts,
pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
return false;
}
- if (user_sz > opts_sz) {
- size_t i;
-
- for (i = opts_sz; i < user_sz; i++) {
- if (opts[i]) {
- pr_warn("%s has non-zero extra bytes\n",
- type_name);
- return false;
- }
- }
+ if (!libbpf_is_mem_zeroed(opts + opts_sz, (ssize_t)user_sz - opts_sz)) {
+ pr_warn("%s has non-zero extra bytes\n", type_name);
+ return false;
}
return true;
}
@@ -233,6 +237,14 @@ static inline bool libbpf_validate_opts(const char *opts,
(opts)->field = value; \
} while (0)
+#define OPTS_ZEROED(opts, last_nonzero_field) \
+({ \
+ ssize_t __off = offsetofend(typeof(*(opts)), last_nonzero_field); \
+ !(opts) || libbpf_is_mem_zeroed((const void *)opts + __off, \
+ (opts)->sz - __off); \
+})
+
+
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2a58b7b5aea4..866531c08e4f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -79,7 +79,7 @@ TEST_PROGS := test_kmod.sh \
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
- test_xdp_vlan.sh
+ test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
@@ -187,6 +187,8 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
cp $(SCRATCH_DIR)/runqslower $@
+TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 029589c008c9..b1ede6f0b821 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -12,6 +12,10 @@
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
struct sock_common {
@@ -27,6 +31,7 @@ enum sk_pacing {
struct sock {
struct sock_common __sk_common;
+#define sk_state __sk_common.skc_state
unsigned long sk_pacing_rate;
__u32 sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));
@@ -203,6 +208,20 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
+static __always_inline bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index d6857683397f..7e9f6375757a 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -218,13 +218,18 @@ static int connect_fd_to_addr(int fd,
return 0;
}
-int connect_to_fd(int server_fd, int timeout_ms)
+static const struct network_helper_opts default_opts;
+
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
struct sockaddr_in *addr_in;
socklen_t addrlen, optlen;
int fd, type;
+ if (!opts)
+ opts = &default_opts;
+
optlen = sizeof(type);
if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
log_err("getsockopt(SOL_TYPE)");
@@ -244,7 +249,12 @@ int connect_to_fd(int server_fd, int timeout_ms)
return -1;
}
- if (settimeo(fd, timeout_ms))
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->cc && opts->cc[0] &&
+ setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc,
+ strlen(opts->cc) + 1))
goto error_close;
if (connect_fd_to_addr(fd, &addr, addrlen))
@@ -257,6 +267,15 @@ error_close:
return -1;
}
+int connect_to_fd(int server_fd, int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
+
+ return connect_to_fd_opts(server_fd, &opts);
+}
+
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
{
struct sockaddr_storage addr;
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index c59a8f6d770b..da7e132657d5 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -17,6 +17,11 @@ typedef __u16 __sum16;
#define VIP_NUM 5
#define MAGIC_BYTES 123
+struct network_helper_opts {
+ const char *cc;
+ int timeout_ms;
+};
+
/* ipv4 test vector */
struct ipv4_packet {
struct ethhdr eth;
@@ -41,6 +46,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
unsigned int nr_listens);
void free_fds(int *fds, unsigned int nr_close_fds);
int connect_to_fd(int server_fd, int timeout_ms);
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index ec11e20d2b92..bf307bb9e446 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,79 +2,28 @@
#include <test_progs.h>
#include "test_attach_probe.skel.h"
-#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
-
-#define OP_RT_RA_MASK 0xffff0000UL
-#define LIS_R2 0x3c400000UL
-#define ADDIS_R2_R12 0x3c4c0000UL
-#define ADDI_R2_R2 0x38420000UL
-
-static ssize_t get_offset(ssize_t addr, ssize_t base)
-{
- u32 *insn = (u32 *) addr;
-
- /*
- * A PPC64 ABIv2 function may have a local and a global entry
- * point. We need to use the local entry point when patching
- * functions, so identify and step over the global entry point
- * sequence.
- *
- * The global entry point sequence is always of the form:
- *
- * addis r2,r12,XXXX
- * addi r2,r2,XXXX
- *
- * A linker optimisation may convert the addis to lis:
- *
- * lis r2,XXXX
- * addi r2,r2,XXXX
- */
- if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
- ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
- ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
- return (ssize_t)(insn + 2) - base;
- else
- return addr - base;
-}
-#else
-#define get_offset(addr, base) (addr - base)
-#endif
-
-ssize_t get_base_addr() {
- size_t start, offset;
- char buf[256];
- FILE *f;
-
- f = fopen("/proc/self/maps", "r");
- if (!f)
- return -errno;
-
- while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
- &start, buf, &offset) == 3) {
- if (strcmp(buf, "r-xp") == 0) {
- fclose(f);
- return start - offset;
- }
- }
-
- fclose(f);
- return -EINVAL;
-}
+/* this is how USDT semaphore is actually defined, except volatile modifier */
+volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
void test_attach_probe(void)
{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
int duration = 0;
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
size_t uprobe_offset;
- ssize_t base_addr;
+ ssize_t base_addr, ref_ctr_offset;
base_addr = get_base_addr();
if (CHECK(base_addr < 0, "get_base_addr",
"failed to find base addr: %zd", base_addr))
return;
- uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -96,20 +45,28 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
- false /* retprobe */,
- 0 /* self pid */,
- "/proc/self/exe",
- uprobe_offset);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
- true /* retprobe */,
- -1 /* any pid */,
- "/proc/self/exe",
- uprobe_offset);
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
@@ -136,4 +93,5 @@ void test_attach_probe(void)
cleanup:
test_attach_probe__destroy(skel);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
new file mode 100644
index 000000000000..5eea3c3a40fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_bpf_cookie.skel.h"
+
+static void kprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+
+ /* attach two kprobes */
+ opts.bpf_cookie = 0x1;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two kretprobes */
+ opts.bpf_cookie = 0x10;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x20;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger kprobe && kretprobe */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->kprobe_res, 0x1 | 0x2, "kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 0x10 | 0x20, "kretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void uprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+
+ base_addr = get_base_addr();
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ /* attach two uprobes */
+ opts.bpf_cookie = 0x100;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x200;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two uretprobes */
+ opts.bpf_cookie = 0x1000;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2000;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger uprobe && uretprobe */
+ get_base_addr();
+
+ ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void tp_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL, *link3 = NULL;
+
+ /* attach first tp prog */
+ opts.bpf_cookie = 0x10000;
+ link1 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp1,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ /* attach second tp prog */
+ opts.bpf_cookie = 0x20000;
+ link2 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp2,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x10000 | 0x20000, "tp_res1");
+
+ /* now we detach first prog and will attach third one, which causes
+ * two internal calls to bpf_prog_array_copy(), shuffling
+ * bpf_prog_array_items around. We test here that we don't lose track
+ * of associated bpf_cookies.
+ */
+ bpf_link__destroy(link1);
+ link1 = NULL;
+ kern_sync_rcu();
+ skel->bss->tp_res = 0;
+
+ /* attach third tp prog */
+ opts.bpf_cookie = 0x40000;
+ link3 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp3,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link3, "link3"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x20000 | 0x40000, "tp_res2");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link3);
+}
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+static void pe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts);
+ struct bpf_link *link = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1;
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x100000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link1"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
+
+ /* prevent bpf_link__destroy() closing pfd itself */
+ bpf_link__disconnect(link);
+ /* close BPF link's FD explicitly */
+ close(bpf_link__fd(link));
+ /* free up memory used by struct bpf_link */
+ bpf_link__destroy(link);
+ link = NULL;
+ kern_sync_rcu();
+ skel->bss->pe_res = 0;
+
+ opts.bpf_cookie = 0x200000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link2"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
+
+cleanup:
+ close(pfd);
+ bpf_link__destroy(link);
+}
+
+void test_bpf_cookie(void)
+{
+ struct test_bpf_cookie *skel;
+
+ skel = test_bpf_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_tid = syscall(SYS_gettid);
+
+ if (test__start_subtest("kprobe"))
+ kprobe_subtest(skel);
+ if (test__start_subtest("uprobe"))
+ uprobe_subtest(skel);
+ if (test__start_subtest("tracepoint"))
+ tp_subtest(skel);
+ if (test__start_subtest("perf_event"))
+ pe_subtest(skel);
+
+ test_bpf_cookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f1aade56504..77ac24b191d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@
#include "bpf_iter_tcp6.skel.h"
#include "bpf_iter_udp4.skel.h"
#include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,19 @@ static void test_udp6(void)
bpf_iter_udp6__destroy(skel);
}
+static void test_unix(void)
+{
+ struct bpf_iter_unix *skel;
+
+ skel = bpf_iter_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_unix);
+
+ bpf_iter_unix__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
@@ -1255,6 +1269,8 @@ void test_bpf_iter(void)
test_udp4();
if (test__start_subtest("udp6"))
test_udp6();
+ if (test__start_subtest("unix"))
+ test_unix();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index efe1e979affb..94e03df69d71 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -4,37 +4,22 @@
#include <linux/err.h>
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
+#include "bpf_dctcp_release.skel.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
static const unsigned int total_bytes = 10 * 1024 * 1024;
-static const struct timeval timeo_sec = { .tv_sec = 10 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int expected_stg = 0xeB9F;
static int stop, duration;
-static int settimeo(int fd)
-{
- int err;
-
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- return 0;
-}
-
static int settcpca(int fd, const char *tcp_ca)
{
int err;
@@ -61,7 +46,7 @@ static void *server(void *arg)
goto done;
}
- if (settimeo(fd)) {
+ if (settimeo(fd, 0)) {
err = -errno;
goto done;
}
@@ -114,7 +99,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
}
if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd) || settimeo(fd))
+ settimeo(lfd, 0) || settimeo(fd, 0))
goto done;
/* bind, listen and start server thread to accept */
@@ -267,6 +252,77 @@ static void test_invalid_license(void)
libbpf_set_print(old_print_fn);
}
+static void test_dctcp_fallback(void)
+{
+ int err, lfd = -1, cli_fd = -1, srv_fd = -1;
+ struct network_helper_opts opts = {
+ .cc = "cubic",
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link = NULL;
+ char srv_cc[16];
+ socklen_t cc_len = sizeof(srv_cc);
+
+ dctcp_skel = bpf_dctcp__open();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ return;
+ strcpy(dctcp_skel->rodata->fallback, "cubic");
+ if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
+ goto done;
+
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(link, "dctcp link"))
+ goto done;
+
+ lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(lfd, 0, "lfd") ||
+ !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp"))
+ goto done;
+
+ cli_fd = connect_to_fd_opts(lfd, &opts);
+ if (!ASSERT_GE(cli_fd, 0, "cli_fd"))
+ goto done;
+
+ srv_fd = accept(lfd, NULL, 0);
+ if (!ASSERT_GE(srv_fd, 0, "srv_fd"))
+ goto done;
+ ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
+ ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+
+ err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
+ if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
+ goto done;
+ ASSERT_STREQ(srv_cc, "cubic", "srv_fd cc");
+
+done:
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+static void test_rel_setsockopt(void)
+{
+ struct bpf_dctcp_release *rel_skel;
+ libbpf_print_fn_t old_print_fn;
+
+ err_str = "unknown func bpf_setsockopt";
+ found = false;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+ rel_skel = bpf_dctcp_release__open_and_load();
+ libbpf_set_print(old_print_fn);
+
+ ASSERT_ERR_PTR(rel_skel, "rel_skel");
+ ASSERT_TRUE(found, "expected_err_msg");
+
+ bpf_dctcp_release__destroy(rel_skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -275,4 +331,8 @@ void test_bpf_tcp_ca(void)
test_cubic();
if (test__start_subtest("invalid_license"))
test_invalid_license();
+ if (test__start_subtest("dctcp_fallback"))
+ test_dctcp_fallback();
+ if (test__start_subtest("rel_setsockopt"))
+ test_rel_setsockopt();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_module.c b/tools/testing/selftests/bpf/prog_tests/btf_module.c
new file mode 100644
index 000000000000..2239d1fe0332
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_module.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_testmod_test_read";
+
+void test_btf_module()
+{
+ struct btf *vmlinux_btf, *module_btf;
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+ if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+ goto cleanup;
+
+ type_id = btf__find_by_name(module_btf, symbol_name);
+ ASSERT_GT(type_id, 0, "func not found");
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 30a7b9b837bf..9611f2bc50df 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -44,7 +44,7 @@ static void test_subprog(void)
ASSERT_OK(err, "bpf_prog_test_run(test1)");
ASSERT_EQ(retval, 10, "test1-retval");
ASSERT_NEQ(skel->data->active_res, -1, "active_res");
- ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
kfunc_call_test_subprog__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index 67bebd324147..cf3acfa5a91d 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -6,6 +6,7 @@
#include <bpf/btf.h>
#include "test_ksyms_btf.skel.h"
#include "test_ksyms_btf_null_check.skel.h"
+#include "test_ksyms_weak.skel.h"
static int duration;
@@ -81,6 +82,33 @@ static void test_null_check(void)
test_ksyms_btf_null_check__destroy(skel);
}
+static void test_weak_syms(void)
+{
+ struct test_ksyms_weak *skel;
+ struct test_ksyms_weak__data *data;
+ int err;
+
+ skel = test_ksyms_weak__open_and_load();
+ if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n"))
+ return;
+
+ err = test_ksyms_weak__attach(skel);
+ if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak__destroy(skel);
+}
+
void test_ksyms_btf(void)
{
int percpu_datasec;
@@ -105,4 +133,7 @@ void test_ksyms_btf(void)
if (test__start_subtest("null_check"))
test_null_check();
+
+ if (test__start_subtest("weak_ksyms"))
+ test_weak_syms();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
new file mode 100644
index 000000000000..71d8f3ba7d6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "netns_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+static int duration;
+
+void test_netns_cookie(void)
+{
+ int server_fd = -1, client_fd = -1, cgroup_fd = -1;
+ int err, val, ret, map, verdict;
+ struct netns_cookie_prog *skel;
+ uint64_t cookie_expected_value;
+ socklen_t vallen = sizeof(cookie_expected_value);
+ static const char send_msg[] = "message";
+
+ skel = netns_cookie_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/netns_cookie");
+ if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+ goto done;
+
+ skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ goto done;
+
+ verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto done;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+ goto done;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+ goto done;
+
+ ret = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret))
+ goto done;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)"))
+ goto done;
+
+ err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE,
+ &cookie_expected_value, &vallen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+done:
+ if (server_fd != -1)
+ close(server_fd);
+ if (client_fd != -1)
+ close(client_fd);
+ if (cgroup_fd != -1)
+ close(cgroup_fd);
+ netns_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
new file mode 100644
index 000000000000..b1abd0c46607
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "test_perf_link.skel.h"
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+void test_perf_link(void)
+{
+ struct test_perf_link *skel = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1, link_fd = -1, err;
+ int run_cnt_before, run_cnt_after;
+ struct bpf_link_info info;
+ __u32 info_len = sizeof(info);
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ skel = test_perf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handler), pfd,
+ BPF_PERF_EVENT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto cleanup;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "link_get_info"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type");
+ ASSERT_GT(info.id, 0, "link_id");
+ ASSERT_GT(info.prog_id, 0, "link_prog_id");
+
+ /* ensure we get at least one perf_event prog execution */
+ burn_cpu();
+ ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt");
+
+ /* perf_event is still active, but we close link and BPF program
+ * shouldn't be executed anymore
+ */
+ close(link_fd);
+ link_fd = -1;
+
+ /* make sure there are no stragglers */
+ kern_sync_rcu();
+
+ run_cnt_before = skel->bss->run_cnt;
+ burn_cpu();
+ run_cnt_after = skel->bss->run_cnt;
+
+ ASSERT_EQ(run_cnt_before, run_cnt_after, "run_cnt_before_after");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ if (pfd >= 0)
+ close(pfd);
+ test_perf_link__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 023cc532992d..776916b61c40 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include "test_send_signal_kern.skel.h"
int sigusr1_received = 0;
@@ -10,29 +12,25 @@ static void sigusr1_handler(int signum)
}
static void test_send_signal_common(struct perf_event_attr *attr,
- bool signal_thread,
- const char *test_name)
+ bool signal_thread)
{
struct test_send_signal_kern *skel;
int pipe_c2p[2], pipe_p2c[2];
int err = -1, pmu_fd = -1;
- __u32 duration = 0;
char buf[256];
pid_t pid;
- if (CHECK(pipe(pipe_c2p), test_name,
- "pipe pipe_c2p error: %s\n", strerror(errno)))
+ if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
return;
- if (CHECK(pipe(pipe_p2c), test_name,
- "pipe pipe_p2c error: %s\n", strerror(errno))) {
+ if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
return;
}
pid = fork();
- if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+ if (!ASSERT_GE(pid, 0, "fork")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -41,26 +39,40 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
if (pid == 0) {
+ int old_prio;
+
/* install signal handler and notify parent */
signal(SIGUSR1, sigusr1_handler);
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
+ /* boost with a high priority so we got a higher chance
+ * that if an interrupt happens, the underlying task
+ * is this process.
+ */
+ errno = 0;
+ old_prio = getpriority(PRIO_PROCESS, 0);
+ ASSERT_OK(errno, "getpriority");
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+
/* notify parent signal handler is installed */
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* make sure parent enabled bpf program to send_signal */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
sleep(1);
buf[0] = sigusr1_received ? '2' : '0';
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* wait for parent notification and exit */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
+
+ /* restore the old priority */
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -71,20 +83,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[0]); /* close read */
skel = test_send_signal_kern__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
goto skel_open_load_failure;
if (!attr) {
err = test_send_signal_kern__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "skel_attach")) {
err = -1;
goto destroy_skel;
}
} else {
pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
-1 /* group id */, 0 /* flags */);
- if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
- strerror(errno))) {
+ if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
err = -1;
goto destroy_skel;
}
@@ -96,7 +107,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
/* wait until child signal handler installed */
- CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
/* trigger the bpf send_signal */
skel->bss->pid = pid;
@@ -104,21 +115,21 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
- if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "reading pipe"))
goto disable_pmu;
- if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+ if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
err = -1;
goto disable_pmu;
}
- CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ ASSERT_EQ(buf[0], '2', "incorrect result");
/* notify child safe to exit */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
disable_pmu:
close(pmu_fd);
@@ -132,7 +143,7 @@ skel_open_load_failure:
static void test_send_signal_tracepoint(bool signal_thread)
{
- test_send_signal_common(NULL, signal_thread, "tracepoint");
+ test_send_signal_common(NULL, signal_thread);
}
static void test_send_signal_perf(bool signal_thread)
@@ -143,7 +154,7 @@ static void test_send_signal_perf(bool signal_thread)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- test_send_signal_common(&attr, signal_thread, "perf_sw_event");
+ test_send_signal_common(&attr, signal_thread);
}
static void test_send_signal_nmi(bool signal_thread)
@@ -172,7 +183,7 @@ static void test_send_signal_nmi(bool signal_thread)
close(pmu_fd);
}
- test_send_signal_common(&attr, signal_thread, "perf_hw_event");
+ test_send_signal_common(&attr, signal_thread);
}
void test_send_signal(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index dffbcaa1ec98..8fd1b4b29a0e 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -19,7 +19,7 @@
#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
-#define EXP_STR_OUT "str1 longstr"
+#define EXP_STR_OUT "str1 a b c d e longstr"
#define EXP_STR_RET sizeof(EXP_STR_OUT)
#define EXP_OVER_OUT "%over"
@@ -114,6 +114,8 @@ void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
+ ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index a9f1bf9d5dff..5c5979046523 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -949,6 +949,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
int err, n;
u32 key;
char b;
+ int retries = 100;
zero_verdict_count(verd_mapfd);
@@ -1001,10 +1002,15 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
goto close_peer1;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
+again:
n = read(c0, &b, 1);
- if (n < 0)
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
+ }
if (n == 0)
FAIL("%s: incomplete read", log_prefix);
@@ -1603,8 +1609,10 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd,
again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
if (n < 0) {
- if (errno == EAGAIN && retries--)
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
}
if (n == 0)
@@ -1692,14 +1700,14 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static int udp_socketpair(int family, int *s, int *c)
+static int inet_socketpair(int family, int type, int *s, int *c)
{
struct sockaddr_storage addr;
socklen_t len;
int p0, c0;
int err;
- p0 = socket_loopback(family, SOCK_DGRAM | SOCK_NONBLOCK);
+ p0 = socket_loopback(family, type | SOCK_NONBLOCK);
if (p0 < 0)
return p0;
@@ -1708,7 +1716,7 @@ static int udp_socketpair(int family, int *s, int *c)
if (err)
goto close_peer0;
- c0 = xsocket(family, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
if (c0 < 0) {
err = c0;
goto close_peer0;
@@ -1747,10 +1755,10 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
zero_verdict_count(verd_mapfd);
- err = udp_socketpair(family, &p0, &c0);
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
if (err)
return;
- err = udp_socketpair(family, &p1, &c1);
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
if (err)
goto close_cli0;
@@ -1776,8 +1784,10 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
if (n < 0) {
- if (errno == EAGAIN && retries--)
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
}
if (n == 0)
@@ -1825,7 +1835,7 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
udp_skb_redir_to_connected(skel, map, family);
}
-static void udp_unix_redir_to_connected(int family, int sock_mapfd,
+static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
@@ -1843,7 +1853,7 @@ static void udp_unix_redir_to_connected(int family, int sock_mapfd,
return;
c0 = sfd[0], p0 = sfd[1];
- err = udp_socketpair(family, &p1, &c1);
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
if (err)
goto close;
@@ -1869,8 +1879,10 @@ static void udp_unix_redir_to_connected(int family, int sock_mapfd,
again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
if (n < 0) {
- if (errno == EAGAIN && retries--)
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
}
if (n == 0)
@@ -1884,7 +1896,7 @@ close:
xclose(p0);
}
-static void udp_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
@@ -1897,14 +1909,20 @@ static void udp_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
return;
skel->bss->test_ingress = false;
- udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
skel->bss->test_ingress = true;
- udp_unix_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
-static void unix_udp_redir_to_connected(int family, int sock_mapfd,
+static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
@@ -1914,10 +1932,11 @@ static void unix_udp_redir_to_connected(int family, int sock_mapfd,
int sfd[2];
u32 key;
char b;
+ int retries = 100;
zero_verdict_count(verd_mapfd);
- err = udp_socketpair(family, &p0, &c0);
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
if (err)
return;
@@ -1944,9 +1963,15 @@ static void unix_udp_redir_to_connected(int family, int sock_mapfd,
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
- if (n < 0)
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
+ }
if (n == 0)
FAIL("%s: incomplete read", log_prefix);
@@ -1959,7 +1984,7 @@ close_cli0:
}
-static void unix_udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
@@ -1972,9 +1997,15 @@ static void unix_udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
return;
skel->bss->test_ingress = false;
- unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
skel->bss->test_ingress = true;
- unix_udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
@@ -1990,8 +2021,8 @@ static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
- udp_unix_skb_redir_to_connected(skel, map, family);
- unix_udp_skb_redir_to_connected(skel, map, family);
+ inet_unix_skb_redir_to_connected(skel, map, family);
+ unix_inet_skb_redir_to_connected(skel, map, family);
}
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
@@ -2020,11 +2051,13 @@ void test_sockmap_listen(void)
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index ec281b0363b8..86f97681ad89 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -195,8 +195,10 @@ static void run_test(int cgroup_fd)
pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
+ (void *)&server_fd))) {
+ pthread_mutex_unlock(&server_started_mtx);
goto close_server_fd;
+ }
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..6b53b3cb8dad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <netinet/tcp.h>
+#include "sockopt_qos_to_cc.skel.h"
+
+static void run_setsockopt_test(int cg_fd, int sock_fd)
+{
+ socklen_t optlen;
+ char cc[16]; /* TCP_CA_NAME_MAX */
+ int buf;
+ int err = -1;
+
+ buf = 0x2D;
+ err = setsockopt(sock_fd, SOL_IPV6, IPV6_TCLASS, &buf, sizeof(buf));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, IPV6_TCLASS)"))
+ return;
+
+ /* Verify the setsockopt cc change */
+ optlen = sizeof(cc);
+ err = getsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, cc, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+
+ if (!ASSERT_STREQ(cc, "reno", "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+}
+
+void test_sockopt_qos_to_cc(void)
+{
+ struct sockopt_qos_to_cc *skel;
+ char cc_cubic[16] = "cubic"; /* TCP_CA_NAME_MAX */
+ int cg_fd = -1;
+ int sock_fd = -1;
+ int err;
+
+ cg_fd = test__join_cgroup("/sockopt_qos_to_cc");
+ if (!ASSERT_GE(cg_fd, 0, "cg-join(sockopt_qos_to_cc)"))
+ return;
+
+ skel = sockopt_qos_to_cc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto done;
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
+ goto done;
+
+ err = setsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, &cc_cubic,
+ sizeof(cc_cubic));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, TCP_CONGESTION)"))
+ goto done;
+
+ skel->links.sockopt_qos_to_cc =
+ bpf_program__attach_cgroup(skel->progs.sockopt_qos_to_cc,
+ cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.sockopt_qos_to_cc,
+ "prog_attach(sockopt_qos_to_cc)"))
+ goto done;
+
+ run_setsockopt_test(cg_fd, sock_fd);
+
+done:
+ if (sock_fd != -1)
+ close(sock_fd);
+ if (cg_fd != -1)
+ close(cg_fd);
+ /* destroy can take null and error pointer */
+ sockopt_qos_to_cc__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
new file mode 100644
index 000000000000..53f0e0fa1a53
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <linux/ptrace.h>
+#include "test_task_pt_regs.skel.h"
+
+void test_task_pt_regs(void)
+{
+ struct test_task_pt_regs *skel;
+ struct bpf_link *uprobe_link;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+ bool match;
+
+ base_addr = get_base_addr();
+ if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+ return;
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ skel = test_task_pt_regs__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
+ false /* retprobe */,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ /* trigger & validate uprobe */
+ get_base_addr();
+
+ if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
+ goto cleanup;
+
+ match = !memcmp(&skel->bss->current_regs, &skel->bss->ctx_regs,
+ sizeof(skel->bss->current_regs));
+ ASSERT_TRUE(match, "check_regs_match");
+
+cleanup:
+ test_task_pt_regs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index f5acbcbe33a4..ced8f6cf347c 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -23,8 +23,12 @@ static int timer_mim(struct timer_mim *timer_skel)
/* check that timer_cb[12] are incrementing 'cnt' */
cnt1 = READ_ONCE(timer_skel->bss->cnt);
- usleep(200); /* 100 times more than interval */
- cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 != cnt1)
+ break;
+ usleep(200); /* 100 times more than interval */
+ }
ASSERT_GT(cnt2, cnt1, "cnt");
ASSERT_EQ(timer_skel->bss->err, 0, "err");
@@ -37,8 +41,12 @@ static int timer_mim(struct timer_mim *timer_skel)
/* check that timer_cb[12] are no longer running */
cnt1 = READ_ONCE(timer_skel->bss->cnt);
- usleep(200);
- cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ usleep(200); /* 100 times more than interval */
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 == cnt1)
+ break;
+ }
ASSERT_EQ(cnt2, cnt1, "cnt");
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index 6b186b4238d0..370d220288a6 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -493,20 +493,20 @@ void test_xdp_bonding(void)
"xdp_redirect_multi_kern__open_and_load"))
goto out;
- if (!test__start_subtest("xdp_bonding_attach"))
+ if (test__start_subtest("xdp_bonding_attach"))
test_xdp_bonding_attach(&skeletons);
for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
struct bond_test_case *test_case = &bond_test_cases[i];
- if (!test__start_subtest(test_case->name))
+ if (test__start_subtest(test_case->name))
test_xdp_bonding_with_mode(
&skeletons,
test_case->mode,
test_case->xmit_policy);
}
- if (!test__start_subtest("xdp_bonding_redirect_multi"))
+ if (test__start_subtest("xdp_bonding_redirect_multi"))
test_xdp_bonding_redirect_multi(&skeletons);
out:
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index fd42247da8b4..9573be6122be 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -17,6 +17,11 @@
char _license[] SEC("license") = "GPL";
+volatile const char fallback[TCP_CA_NAME_MAX];
+const char bpf_dctcp[] = "bpf_dctcp";
+const char tcp_cdg[] = "cdg";
+char cc_res[TCP_CA_NAME_MAX];
+int tcp_cdg_res = 0;
int stg_result = 0;
struct {
@@ -57,6 +62,26 @@ void BPF_PROG(dctcp_init, struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
int *stg;
+ if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
+ /* Switch to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Switch back to myself which the bpf trampoline
+ * stopped calling dctcp_init recursively.
+ */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)bpf_dctcp, sizeof(bpf_dctcp));
+ /* Switch back to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Expecting -ENOTSUPP for tcp_cdg_res */
+ tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)tcp_cdg, sizeof(tcp_cdg));
+ bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cc_res, sizeof(cc_res));
+ return;
+ }
+
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
new file mode 100644
index 000000000000..d836f7c372f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+const char cubic[] = "cubic";
+
+void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+{
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cubic, sizeof(cubic));
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_rel = {
+ .release = (void *)dctcp_nouse_release,
+ .name = "bpf_dctcp_rel",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 3d83b185c4bc..8cfaeba1ddbf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -12,6 +12,7 @@
#define tcp6_sock tcp6_sock___not_used
#define bpf_iter__udp bpf_iter__udp___not_used
#define udp6_sock udp6_sock___not_used
+#define bpf_iter__unix bpf_iter__unix___not_used
#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
#define bpf_iter__sockmap bpf_iter__sockmap___not_used
@@ -32,6 +33,7 @@
#undef tcp6_sock
#undef bpf_iter__udp
#undef udp6_sock
+#undef bpf_iter__unix
#undef bpf_iter__bpf_map_elem
#undef bpf_iter__bpf_sk_storage_map
#undef bpf_iter__sockmap
@@ -103,6 +105,12 @@ struct udp6_sock {
struct ipv6_pinfo inet6;
} __attribute__((preserve_access_index));
+struct bpf_iter__unix {
+ struct bpf_iter_meta *meta;
+ struct unix_sock *unix_sk;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
struct bpf_iter__bpf_map_elem {
struct bpf_iter_meta *meta;
struct bpf_map *map;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..94423902685d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ struct sock *sk = (struct sock *)unix_sk;
+ struct seq_file *seq;
+ __u32 seq_num;
+
+ if (!unix_sk)
+ return 0;
+
+ seq = ctx->meta->seq;
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Num RefCount Protocol Flags Type St Inode Path\n");
+
+ BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %8lu",
+ unix_sk,
+ sk->sk_refcnt.refs.counter,
+ 0,
+ sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+ sk->sk_type,
+ sk->sk_socket ?
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+ sock_i_ino(sk));
+
+ if (unix_sk->addr) {
+ if (!UNIX_ABSTRACT(unix_sk)) {
+ BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+ } else {
+ /* The name of the abstract UNIX domain socket starts
+ * with '\0' and can contain '\0'. The null bytes
+ * should be escaped as done in unix_seq_show().
+ */
+ __u64 i, len;
+
+ len = unix_sk->addr->len - sizeof(short);
+
+ BPF_SEQ_PRINTF(seq, " @");
+
+ for (i = 1; i < len; i++) {
+ /* unix_mkname() tests this upper bound. */
+ if (i >= sizeof(struct sockaddr_un))
+ break;
+
+ BPF_SEQ_PRINTF(seq, "%c",
+ unix_sk->addr->name->sun_path[i] ?:
+ '@');
+ }
+ }
+ }
+
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 3af0998a0623..eef5646ddb19 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,10 @@
#define AF_INET 2
#define AF_INET6 10
+#define __SO_ACCEPTCON (1 << 16)
+#define UNIX_HASH_SIZE 256
+#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
+
#define SOL_TCP 6
#define TCP_CONGESTION 13
#define TCP_CA_NAME_MAX 16
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index b2dcb7d9cb03..5fbd9e232d44 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -9,7 +9,7 @@ extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
__u32 c, __u64 d) __ksym;
extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
int active_res = -1;
-int sk_state = -1;
+int sk_state_res = -1;
int __noinline f1(struct __sk_buff *skb)
{
@@ -28,7 +28,7 @@ int __noinline f1(struct __sk_buff *skb)
if (active)
active_res = *active;
- sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+ sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state;
return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
}
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
new file mode 100644
index 000000000000..aeff3a4f9287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+#define AF_INET6 10
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sockops_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_msg_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *cookie;
+ __u32 key = 0;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ cookie = bpf_sk_storage_get(&sockops_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(ctx);
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ bpf_sock_map_update(ctx, &sock_map, &key, BPF_NOEXIST);
+ break;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+SEC("sk_msg")
+int get_netns_cookie_sk_msg(struct sk_msg_md *msg)
+{
+ struct bpf_sock *sk = msg->sk;
+ int *cookie;
+
+ if (msg->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ cookie = bpf_sk_storage_get(&sk_msg_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(msg);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..1bce83b6e3a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <string.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/setsockopt")
+int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
+{
+ void *optval_end = ctx->optval_end;
+ int *optval = ctx->optval;
+ char buf[TCP_CA_NAME_MAX];
+ char cc_reno[TCP_CA_NAME_MAX] = "reno";
+ char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
+ if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
+ return 1;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 0;
+
+ if (!tcp_cc_eq(buf, cc_cubic))
+ return 0;
+
+ if (*optval == 0x2d) {
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+ sizeof(cc_reno)))
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 8acdb99b5959..79c8139b63b8 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -33,6 +33,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
@@ -123,6 +131,14 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
new file mode 100644
index 000000000000..2d3a7710e2ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int my_tid;
+
+int kprobe_res;
+int kprobe_multi_res;
+int kretprobe_res;
+int uprobe_res;
+int uretprobe_res;
+int tp_res;
+int pe_res;
+
+static void update(void *ctx, int *res)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return;
+
+ *res |= bpf_get_attach_cookie(ctx);
+}
+
+SEC("kprobe/sys_nanosleep")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kprobe_res);
+ return 0;
+}
+
+SEC("kretprobe/sys_nanosleep")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kretprobe_res);
+ return 0;
+}
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uprobe_res);
+ return 0;
+}
+
+SEC("uretprobe/trigger_func")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uretprobe_res);
+ return 0;
+}
+
+/* bpf_prog_array, used by kernel internally to keep track of attached BPF
+ * programs to a given BPF hook (e.g., for tracepoints) doesn't allow the same
+ * BPF program to be attached multiple times. So have three identical copies
+ * ready to attach to the same tracepoint.
+ */
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp1(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp2(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp3(void *ctx)
+{
+ update(ctx, &tp_res);
+ return 1;
+}
+
+SEC("perf_event")
+int handle_pe(struct pt_regs *ctx)
+{
+ update(ctx, &pe_res);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
index 44f5aa2e8956..9a7829c5e4a7 100644
--- a/tools/testing/selftests/bpf/progs/test_core_autosize.c
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -125,6 +125,16 @@ int handle_downsize(void *ctx)
return 0;
}
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_core_read_int bpf_core_read
+#else
+#define bpf_core_read_int(dst, sz, src) ({ \
+ /* Prevent "subtraction from stack pointer prohibited" */ \
+ volatile long __off = sizeof(*dst) - (sz); \
+ bpf_core_read((char *)(dst) + __off, sz, src); \
+})
+#endif
+
SEC("raw_tp/sys_enter")
int handle_probed(void *ctx)
{
@@ -132,23 +142,23 @@ int handle_probed(void *ctx)
__u64 tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
ptr_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val1), &in->val1);
val1_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val2), &in->val2);
val2_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val3), &in->val3);
val3_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val4), &in->val4);
val4_probed = tmp;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
new file mode 100644
index 000000000000..5f8379aadb29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test weak ksyms.
+ *
+ * Copyright (c) 2021 Google
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+int out__existing_typed = -1;
+__u64 out__existing_typeless = -1;
+
+__u64 out__non_existent_typeless = -1;
+__u64 out__non_existent_typed = -1;
+
+/* existing weak symbols */
+
+/* test existing weak symbols can be resolved. */
+extern const struct rq runqueues __ksym __weak; /* typed */
+extern const void bpf_prog_active __ksym __weak; /* typeless */
+
+
+/* non-existent weak symbols. */
+
+/* typeless symbols, default to zero. */
+extern const void bpf_link_fops1 __ksym __weak;
+
+/* typed symbols, default to zero. */
+extern const int bpf_link_fops2 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int pass_handler(const void *ctx)
+{
+ struct rq *rq;
+
+ /* tests existing symbols. */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq)
+ out__existing_typed = rq->cpu;
+ out__existing_typeless = (__u64)&bpf_prog_active;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typeless = (__u64)&bpf_link_fops1;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typed = (__u64)&bpf_link_fops2;
+
+ if (&bpf_link_fops2) /* can't happen */
+ out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_link.c b/tools/testing/selftests/bpf/progs/test_perf_link.c
new file mode 100644
index 000000000000..c1db9fd98d0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_link.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int run_cnt = 0;
+
+SEC("perf_event")
+int handler(struct pt_regs *ctx)
+{
+ __sync_fetch_and_add(&run_cnt, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index e2ad26150f9b..8fda07544023 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -59,9 +59,9 @@ int handler(const void *ctx)
/* Kernel pointers */
addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
- /* Strings embedding */
- str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
- str1, longstr);
+ /* Strings and single-byte character embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s % 9c %+2c %-3c %04c %0c %+05s",
+ str1, 'a', 'b', 'c', 'd', 'e', longstr);
/* Overflow */
over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
/* Padding of fixed width numbers */
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
new file mode 100644
index 000000000000..6c059f1cfa1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct pt_regs current_regs = {};
+struct pt_regs ctx_regs = {};
+int uprobe_res = 0;
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ struct task_struct *current;
+ struct pt_regs *regs;
+
+ current = bpf_get_current_task_btf();
+ regs = (struct pt_regs *) bpf_task_pt_regs(current);
+ __builtin_memcpy(&current_regs, regs, sizeof(*regs));
+ __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx));
+
+ /* Prove that uprobe was run */
+ uprobe_res = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
index 66690778e36d..718f59692ccb 100755
--- a/tools/testing/selftests/bpf/test_bpftool.sh
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -2,4 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2020 SUSE LLC.
+# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
+SCRIPT_DIR=$(dirname $(realpath $0))
+
+# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
+BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
+export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -22,7 +22,7 @@ KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
cd $KDIR_ROOT_DIR
if [ ! -e tools/bpf/bpftool/Makefile ]; then
echo -e "skip: bpftool files not found!\n"
- exit 0
+ exit 4 # KSFT_SKIP=4
fi
ERROR=0
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
index ed12111cd2f0..679cf968c7d1 100755
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -4,11 +4,17 @@ set -e
# Assume script is located under tools/testing/selftests/bpf/. We want to start
# build attempts from the top of kernel repository.
-SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_PATH=$(realpath $0)
SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
-KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+KDIR_ROOT_DIR=$(realpath $SCRIPT_REL_DIR/../../../../)
+SCRIPT_REL_DIR=$(dirname $(realpath --relative-to=$KDIR_ROOT_DIR $SCRIPT_REL_PATH))
cd $KDIR_ROOT_DIR
+if [ ! -e $PWD/$SCRIPT_REL_DIR/Makefile ]; then
+ echo -e "skip: bpftool files not found!\n"
+ exit 4 # KSFT_SKIP=4
+fi
+
for tgt in docs docs-clean; do
make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
done
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 14cea869235b..c7a36a9378f8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -985,7 +985,7 @@ static void test_sockmap(unsigned int tasks, void *data)
FD_ZERO(&w);
FD_SET(sfd[3], &w);
- to.tv_sec = 1;
+ to.tv_sec = 30;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
@@ -1396,15 +1396,22 @@ static void test_map_stress(void)
#define DO_DELETE 0
#define MAP_RETRIES 20
+#define MAX_DELAY_US 50000
+#define MIN_DELAY_RANGE_US 5000
static int map_update_retriable(int map_fd, const void *key, const void *value,
int flags, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_update_elem(map_fd, key, value, flags)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
@@ -1413,11 +1420,16 @@ static int map_update_retriable(int map_fd, const void *key, const void *value,
static int map_delete_retriable(int map_fd, const void *key, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_delete_elem(map_fd, key)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6f103106a39b..cc1cd240445d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -13,6 +13,28 @@
#include <execinfo.h> /* backtrace */
#include <linux/membarrier.h>
+/* Adapted from perf/util/string.c */
+static bool glob_match(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_match(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
#define EXIT_NO_TEST 2
#define EXIT_ERR_SETUP_INFRA 3
@@ -55,12 +77,12 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
int i;
for (i = 0; i < sel->blacklist.cnt; i++) {
- if (strstr(name, sel->blacklist.strs[i]))
+ if (glob_match(name, sel->blacklist.strs[i]))
return false;
}
for (i = 0; i < sel->whitelist.cnt; i++) {
- if (strstr(name, sel->whitelist.strs[i]))
+ if (glob_match(name, sel->whitelist.strs[i]))
return true;
}
@@ -148,18 +170,18 @@ void test__end_subtest()
struct prog_test_def *test = env.test;
int sub_error_cnt = test->error_cnt - test->old_error_cnt;
+ dump_test_log(test, sub_error_cnt);
+
+ fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
+ test->test_num, test->subtest_num, test->test_name, test->subtest_name,
+ sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
if (sub_error_cnt)
env.fail_cnt++;
else if (test->skip_cnt == 0)
env.sub_succ_cnt++;
skip_account();
- dump_test_log(test, sub_error_cnt);
-
- fprintf(env.stdout, "#%d/%d %s:%s\n",
- test->test_num, test->subtest_num, test->subtest_name,
- sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -450,6 +472,8 @@ enum ARG_KEYS {
ARG_VERBOSE = 'v',
ARG_GET_TEST_CNT = 'c',
ARG_LIST_TEST_NAMES = 'l',
+ ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
+ ARG_TEST_NAME_GLOB_DENYLIST = 'd',
};
static const struct argp_option opts[] = {
@@ -467,6 +491,10 @@ static const struct argp_option opts[] = {
"Get number of selected top-level tests " },
{ "list", ARG_LIST_TEST_NAMES, NULL, 0,
"List test names that would run (without running them) " },
+ { "allow", ARG_TEST_NAME_GLOB_ALLOWLIST, "NAMES", 0,
+ "Run tests with name matching the pattern (supports '*' wildcard)." },
+ { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
+ "Don't run tests with name matching the pattern (supports '*' wildcard)." },
{},
};
@@ -491,36 +519,48 @@ static void free_str_set(const struct str_set *set)
free(set->strs);
}
-static int parse_str_list(const char *s, struct str_set *set)
+static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
{
char *input, *state = NULL, *next, **tmp, **strs = NULL;
- int cnt = 0;
+ int i, cnt = 0;
input = strdup(s);
if (!input)
return -ENOMEM;
- set->cnt = 0;
- set->strs = NULL;
-
while ((next = strtok_r(state ? NULL : input, ",", &state))) {
tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
if (!tmp)
goto err;
strs = tmp;
- strs[cnt] = strdup(next);
- if (!strs[cnt])
- goto err;
+ if (is_glob_pattern) {
+ strs[cnt] = strdup(next);
+ if (!strs[cnt])
+ goto err;
+ } else {
+ strs[cnt] = malloc(strlen(next) + 2 + 1);
+ if (!strs[cnt])
+ goto err;
+ sprintf(strs[cnt], "*%s*", next);
+ }
cnt++;
}
- set->cnt = cnt;
- set->strs = (const char **)strs;
+ tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
+ if (!tmp)
+ goto err;
+ memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
+ set->strs = (const char **)tmp;
+ set->cnt += cnt;
+
free(input);
+ free(strs);
return 0;
err:
+ for (i = 0; i < cnt; i++)
+ free(strs[i]);
free(strs);
free(input);
return -ENOMEM;
@@ -553,29 +593,35 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
break;
}
+ case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.whitelist))
+ &env->subtest_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.whitelist))
+ if (parse_str_list(arg, &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
break;
}
+ case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.blacklist))
+ &env->subtest_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.blacklist))
+ if (parse_str_list(arg, &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
break;
}
@@ -755,7 +801,7 @@ int main(int argc, char **argv)
save_netns();
stdio_hijack();
env.has_testmod = true;
- if (load_bpf_testmod()) {
+ if (!env.list_test_names && load_bpf_testmod()) {
fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
env.has_testmod = false;
}
@@ -786,24 +832,25 @@ int main(int argc, char **argv)
test__end_subtest();
test->tested = true;
- if (test->error_cnt)
- env.fail_cnt++;
- else
- env.succ_cnt++;
- skip_account();
dump_test_log(test, test->error_cnt);
fprintf(env.stdout, "#%d %s:%s\n",
test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : "OK");
+ test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
+ if (test->error_cnt)
+ env.fail_cnt++;
+ else
+ env.succ_cnt++;
+ skip_account();
reset_affinity();
restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
- if (env.has_testmod)
+ if (!env.list_test_names && env.has_testmod)
unload_bpf_testmod();
stdio_restore();
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 46633a3bfb0b..cd7bf32e6a17 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -63,14 +63,11 @@
# ----------------
# Must run with CAP_NET_ADMIN capability.
#
-# Run (full color-coded output):
-# sudo ./test_xsk.sh -c
+# Run:
+# sudo ./test_xsk.sh
#
# If running from kselftests:
-# sudo make colorconsole=1 run_tests
-#
-# Run (full output without color-coding):
-# sudo ./test_xsk.sh
+# sudo make run_tests
#
# Run with verbose output:
# sudo ./test_xsk.sh -v
@@ -83,7 +80,6 @@
while getopts "cvD" flag
do
case "${flag}" in
- c) colorconsole=1;;
v) verbose=1;;
D) dump_pkts=1;;
esac
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 1bbd1d9830c8..e7a19b04d4ea 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -136,3 +136,90 @@ void read_trace_pipe(void)
}
}
}
+
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ u32 *insn = (u32 *)(uintptr_t)addr;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (ssize_t)(insn + 2) - base;
+ else
+ return (uintptr_t)addr - base;
+}
+
+#else
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ return (uintptr_t)addr - base;
+}
+
+#endif
+
+ssize_t get_base_addr(void)
+{
+ size_t start, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+ &start, buf, &offset) == 3) {
+ if (strcmp(buf, "r-xp") == 0) {
+ fclose(f);
+ return start - offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
+
+ssize_t get_rel_offset(uintptr_t addr)
+{
+ size_t start, end, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) {
+ if (addr >= start && addr < end) {
+ fclose(f);
+ return (size_t)addr - start + offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index f62fdef9e589..d907b445524d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -18,4 +18,8 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
+ssize_t get_uprobe_offset(const void *addr, ssize_t base);
+ssize_t get_base_addr(void);
+ssize_t get_rel_offset(uintptr_t addr);
+
#endif
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 1135fb980814..f53ce2683f8d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -70,7 +70,6 @@
#include <errno.h>
#include <getopt.h>
#include <asm/barrier.h>
-typedef __u16 __sum16;
#include <linux/if_link.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
@@ -106,14 +105,9 @@ static const u16 UDP_PORT2 = 2121;
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
- if (configured_mode == TEST_MODE_UNCONFIGURED) {
- ksft_exit_fail_msg
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- } else {
- ksft_test_result_fail
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- ksft_exit_xfail();
- }
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+ strerror(error));
+ ksft_exit_xfail();
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
@@ -126,7 +120,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
test_type == TEST_TYPE_STATS ? "Stats" : "",\
test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
-static void *memset32_htonl(void *dest, u32 val, u32 size)
+static void memset32_htonl(void *dest, u32 val, u32 size)
{
u32 *ptr = (u32 *)dest;
int i;
@@ -135,11 +129,6 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
for (i = 0; i < (size & (~0x3)); i += 4)
ptr[i >> 2] = val;
-
- for (; i < size; i++)
- ((char *)dest)[i] = ((char *)&val)[i & 3];
-
- return dest;
}
/*
@@ -230,13 +219,13 @@ static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
ip_hdr->check = 0;
}
-static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject,
+static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
struct udphdr *udp_hdr)
{
udp_hdr->source = htons(ifobject->src_port);
udp_hdr->dest = htons(ifobject->dst_port);
udp_hdr->len = htons(UDP_PKT_SIZE);
- memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE);
+ memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
}
static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
@@ -246,12 +235,7 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
}
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
- memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
-}
-
-static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
{
struct xsk_umem_config cfg = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -260,7 +244,6 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
.frame_headroom = frame_headroom,
.flags = XSK_UMEM__DEFAULT_FLAGS
};
- int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
struct xsk_umem_info *umem;
int ret;
@@ -271,7 +254,7 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
ret = xsk_umem__create(&umem->umem, buffer, size,
&umem->fq, &umem->cq, &cfg);
if (ret)
- exit_with_error(ret);
+ exit_with_error(-ret);
umem->buffer = buffer;
@@ -285,7 +268,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(ret);
+ exit_with_error(-ret);
for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
@@ -333,20 +316,19 @@ static struct option long_options[] = {
{"queue", optional_argument, 0, 'q'},
{"dump-pkts", optional_argument, 0, 'D'},
{"verbose", no_argument, 0, 'v'},
- {"tx-pkt-count", optional_argument, 0, 'C'},
{0, 0, 0, 0}
};
static void usage(const char *prog)
{
const char *str =
- " Usage: %s [OPTIONS]\n"
- " Options:\n"
- " -i, --interface Use interface\n"
- " -q, --queue=n Use queue n (default 0)\n"
- " -D, --dump-pkts Dump packets L2 - L5\n"
- " -v, --verbose Verbose output\n"
- " -C, --tx-pkt-count=n Number of packets to send\n";
+ " Usage: %s [OPTIONS]\n"
+ " Options:\n"
+ " -i, --interface Use interface\n"
+ " -q, --queue=n Use queue n (default 0)\n"
+ " -D, --dump-pkts Dump packets L2 - L5\n"
+ " -v, --verbose Verbose output\n";
+
ksft_print_msg(str, prog);
}
@@ -392,7 +374,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
if (c == -1)
break;
@@ -413,13 +395,10 @@ static void parse_command_line(int argc, char **argv)
interface_index++;
break;
case 'D':
- debug_pkt_dump = 1;
- break;
- case 'C':
- opt_pkt_count = atoi(optarg);
+ opt_pkt_dump = true;
break;
case 'v':
- opt_verbose = 1;
+ opt_verbose = true;
break;
default:
usage(basename(argv[0]));
@@ -427,17 +406,143 @@ static void parse_command_line(int argc, char **argv)
}
}
- if (!opt_pkt_count) {
- print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
- opt_pkt_count = DEFAULT_PKT_CNT;
- }
-
if (!validate_interfaces()) {
usage(basename(argv[0]));
ksft_exit_xfail();
}
}
+static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+{
+ if (pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_nb];
+}
+
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = malloc(sizeof(*pkt_stream));
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->nb_pkts = nb_pkts;
+ for (i = 0; i < nb_pkts; i++) {
+ pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ pkt_stream->pkts[i].len = pkt_len;
+ pkt_stream->pkts[i].payload = i;
+ }
+
+ return pkt_stream;
+}
+
+static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+{
+ struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
+ struct udphdr *udp_hdr;
+ struct ethhdr *eth_hdr;
+ struct iphdr *ip_hdr;
+ void *data;
+
+ if (!pkt)
+ return NULL;
+
+ data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
+ udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+ ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+ eth_hdr = (struct ethhdr *)data;
+
+ gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
+ gen_ip_hdr(ifobject, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr(ifobject, eth_hdr);
+
+ return pkt;
+}
+
+static void pkt_dump(void *pkt, u32 len)
+{
+ char s[INET_ADDRSTRLEN];
+ struct ethhdr *ethhdr;
+ struct udphdr *udphdr;
+ struct iphdr *iphdr;
+ int payload, i;
+
+ ethhdr = pkt;
+ iphdr = pkt + sizeof(*ethhdr);
+ udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
+
+ /*extract L2 frame */
+ fprintf(stdout, "DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+ fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+ /*extract L3 frame */
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
+ /*extract L4 frame */
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
+ /*extract L5 frame */
+ payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
+
+ fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+ fprintf(stdout, "---------------------------------------\n");
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
+{
+ void *data = xsk_umem__get_data(buffer, desc->addr);
+ struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+
+ if (!pkt) {
+ ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
+ return false;
+ }
+
+ if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
+ u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+
+ if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
+ pkt_dump(data, PKT_SIZE);
+
+ if (pkt->len != desc->len) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected length [%d], got length [%d]\n",
+ __func__, pkt->len, desc->len);
+ return false;
+ }
+
+ if (pkt->payload != seqnum) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
+ __func__, pkt->payload, seqnum);
+ return false;
+ }
+ } else {
+ ksft_print_msg("Invalid frame received: ");
+ ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
+ iphdr->tos);
+ return false;
+ }
+
+ return true;
+}
+
static void kick_tx(struct xsk_socket_info *xsk)
{
int ret;
@@ -448,7 +553,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -463,133 +568,108 @@ static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
if (rcvd) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
}
}
-static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
+ struct pollfd *fds)
{
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_fq = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
+ struct pkt *pkt;
int ret;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(ret);
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ while (pkt) {
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ continue;
}
- return;
- }
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
if (ret < 0)
- exit_with_error(ret);
+ exit_with_error(-ret);
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- }
-
- for (i = 0; i < rcvd; i++) {
- u64 addr, orig;
-
- addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
- xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- orig = xsk_umem__extract_addr(addr);
- addr = xsk_umem__add_offset_to_addr(addr);
- pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
- if (!pkt_node_rx)
- exit_with_error(errno);
+ for (i = 0; i < rcvd; i++) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
- pkt_node_rx->pkt_frame = malloc(PKT_SIZE);
- if (!pkt_node_rx->pkt_frame)
- exit_with_error(errno);
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+ if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
+ return;
- memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
- PKT_SIZE);
-
- TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ }
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+ xsk_ring_cons__release(&xsk->rx, rcvd);
}
-
- xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
}
-static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
{
- u32 idx = 0;
- unsigned int i;
- bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
- u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
+ struct xsk_socket_info *xsk = ifobject->xsk;
+ u32 i, idx;
- while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
- complete_tx_only(xsk, batch_size);
+ while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
+ complete_pkts(xsk, BATCH_SIZE);
- for (i = 0; i < batch_size; i++) {
+ for (i = 0; i < BATCH_SIZE; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+ struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
- tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = len;
- }
+ if (!pkt)
+ break;
- xsk_ring_prod__submit(&xsk->tx, batch_size);
- if (!tx_invalid_test) {
- xsk->outstanding_tx += batch_size;
- } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
- kick_tx(xsk);
+ tx_desc->addr = pkt->addr;
+ tx_desc->len = pkt->len;
+ pkt_nb++;
}
- *frameptr += batch_size;
- *frameptr %= num_frames;
- complete_tx_only(xsk, batch_size);
-}
-
-static int get_batch_size(int pkt_cnt)
-{
- if (!opt_pkt_count)
- return BATCH_SIZE;
- if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
- return BATCH_SIZE;
+ xsk_ring_prod__submit(&xsk->tx, i);
+ if (stat_test_type != STAT_TEST_TX_INVALID)
+ xsk->outstanding_tx += i;
+ else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
+ complete_pkts(xsk, i);
- return opt_pkt_count - pkt_cnt;
+ return i;
}
-static void complete_tx_only_all(struct ifobject *ifobject)
+static void wait_for_tx_completion(struct xsk_socket_info *xsk)
{
- bool pending;
-
- do {
- pending = false;
- if (ifobject->xsk->outstanding_tx) {
- complete_tx_only(ifobject->xsk, BATCH_SIZE);
- pending = !!ifobject->xsk->outstanding_tx;
- }
- } while (pending);
+ while (xsk->outstanding_tx)
+ complete_pkts(xsk, BATCH_SIZE);
}
-static void tx_only_all(struct ifobject *ifobject)
+static void send_pkts(struct ifobject *ifobject)
{
struct pollfd fds[MAX_SOCKS] = { };
- u32 frame_nb = 0;
- int pkt_cnt = 0;
- int ret;
+ u32 pkt_cnt = 0;
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLOUT;
- while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
- int batch_size = get_batch_size(pkt_cnt);
+ while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+ u32 sent;
if (test_type == TEST_TYPE_POLL) {
+ int ret;
+
ret = poll(fds, 1, POLL_TMOUT);
if (ret <= 0)
continue;
@@ -598,78 +678,30 @@ static void tx_only_all(struct ifobject *ifobject)
continue;
}
- tx_only(ifobject->xsk, &frame_nb, batch_size);
- pkt_cnt += batch_size;
+ sent = __send_pkts(ifobject, pkt_cnt);
+ pkt_cnt += sent;
+ usleep(10);
}
- if (opt_pkt_count)
- complete_tx_only_all(ifobject);
+ wait_for_tx_completion(ifobject->xsk);
}
-static void worker_pkt_dump(void)
-{
- struct ethhdr *ethhdr;
- struct iphdr *iphdr;
- struct udphdr *udphdr;
- char s[128];
- int payload;
- void *ptr;
-
- fprintf(stdout, "---------------------------------------\n");
- for (int iter = 0; iter < num_frames - 1; iter++) {
- ptr = pkt_buf[iter]->payload;
- ethhdr = ptr;
- iphdr = ptr + sizeof(*ethhdr);
- udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
-
- /*extract L2 frame */
- fprintf(stdout, "DEBUG>> L2: dst mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
- fprintf(stdout, "\nDEBUG>> L2: src mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
- /*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
- inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
- inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
- /*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
- /*extract L5 frame */
- payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
-
- if (payload == EOT) {
- print_verbose("End-of-transmission frame received\n");
- fprintf(stdout, "---------------------------------------\n");
- break;
- }
- fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
- fprintf(stdout, "---------------------------------------\n");
- }
-}
-
-static void worker_stats_validate(struct ifobject *ifobject)
+static bool rx_stats_are_valid(struct ifobject *ifobject)
{
+ u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
struct xdp_statistics stats;
socklen_t optlen;
int err;
- struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
- ifdict[!ifobject->ifdict_index]->xsk->xsk :
- ifobject->xsk->xsk;
- int fd = xsk_socket__fd(xsk);
- unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
-
- sigvar = 0;
optlen = sizeof(stats);
err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
- if (err)
- return;
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return true;
+ }
if (optlen == sizeof(struct xdp_statistics)) {
switch (stat_test_type) {
@@ -677,8 +709,7 @@ static void worker_stats_validate(struct ifobject *ifobject)
xsk_stat = stats.rx_dropped;
break;
case STAT_TEST_TX_INVALID:
- xsk_stat = stats.tx_invalid_descs;
- break;
+ return true;
case STAT_TEST_RX_FULL:
xsk_stat = stats.rx_ring_full;
expected_stat -= RX_FULL_RXQSIZE;
@@ -691,99 +722,70 @@ static void worker_stats_validate(struct ifobject *ifobject)
}
if (xsk_stat == expected_stat)
- sigvar = 1;
+ return true;
}
+
+ return false;
}
-static void worker_pkt_validate(void)
+static void tx_stats_validate(struct ifobject *ifobject)
{
- u32 payloadseqnum = -2;
- struct iphdr *iphdr;
-
- while (1) {
- pkt_node_rx_q = TAILQ_LAST(&head, head_s);
- if (!pkt_node_rx_q)
- break;
-
- iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr));
-
- /*do not increment pktcounter if !(tos=0x9 and ipv4) */
- if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
- payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
- if (debug_pkt_dump && payloadseqnum != EOT) {
- pkt_obj = malloc(sizeof(*pkt_obj));
- pkt_obj->payload = malloc(PKT_SIZE);
- memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
- pkt_buf[payloadseqnum] = pkt_obj;
- }
-
- if (payloadseqnum == EOT) {
- print_verbose("End-of-transmission frame received: PASS\n");
- sigvar = 1;
- break;
- }
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
- if (prev_pkt + 1 != payloadseqnum) {
- ksft_test_result_fail
- ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
- __func__, prev_pkt, payloadseqnum);
- ksft_exit_xfail();
- }
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return;
+ }
- prev_pkt = payloadseqnum;
- pkt_counter++;
- } else {
- ksft_print_msg("Invalid frame received: ");
- ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
- iphdr->tos);
- }
+ if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
+ return;
- TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
- free(pkt_node_rx_q->pkt_frame);
- free(pkt_node_rx_q);
- pkt_node_rx_q = NULL;
- }
+ ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+ __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
}
static void thread_common_ops(struct ifobject *ifobject, void *bufs)
{
- int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ size_t mmap_sz = umem_sz;
int ctr = 0;
int ret;
ifobject->ns_fd = switch_namespace(ifobject->nsname);
if (test_type == TEST_TYPE_BPF_RES)
- umem_sz *= 2;
+ mmap_sz *= 2;
- bufs = mmap(NULL, umem_sz,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (bufs == MAP_FAILED)
exit_with_error(errno);
- xsk_configure_umem(ifobject, bufs, 0);
- ifobject->umem = ifobject->umem_arr[0];
- ret = xsk_configure_socket(ifobject, 0);
-
- /* Retry Create Socket if it fails as xsk_socket__create()
- * is asynchronous
- */
- while (ret && ctr < SOCK_RECONF_CTR) {
- xsk_configure_umem(ifobject, bufs, 0);
+ while (ctr++ < SOCK_RECONF_CTR) {
+ xsk_configure_umem(ifobject, bufs, umem_sz, 0);
ifobject->umem = ifobject->umem_arr[0];
ret = xsk_configure_socket(ifobject, 0);
+ if (!ret)
+ break;
+
+ /* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
usleep(USLEEP_MAX);
- ctr++;
+ if (ctr >= SOCK_RECONF_CTR)
+ exit_with_error(-ret);
}
- if (ctr >= SOCK_RECONF_CTR)
- exit_with_error(ret);
-
ifobject->umem = ifobject->umem_arr[0];
ifobject->xsk = ifobject->xsk_arr[0];
if (test_type == TEST_TYPE_BPF_RES) {
- xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+ xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
ifobject->umem = ifobject->umem_arr[1];
ret = xsk_configure_socket(ifobject, 1);
}
@@ -809,33 +811,18 @@ static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
static void *worker_testapp_validate_tx(void *arg)
{
- struct udphdr *udp_hdr =
- (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
struct ifobject *ifobject = (struct ifobject *)arg;
- struct generic_data data;
void *bufs = NULL;
if (!second_step)
thread_common_ops(ifobject, bufs);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
+ print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
+ ifobject->ifname);
+ send_pkts(ifobject);
- print_verbose("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ tx_stats_validate(ifobject);
testapp_cleanup_xsk_res(ifobject);
pthread_exit(NULL);
@@ -853,31 +840,16 @@ static void *worker_testapp_validate_rx(void *arg)
if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
xsk_populate_fill_ring(ifobject->umem);
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
-
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLIN;
pthread_barrier_wait(&barr);
- while (1) {
- if (test_type != TEST_TYPE_STATS) {
- rx_pkt(ifobject->xsk, fds);
- worker_pkt_validate();
- } else {
- worker_stats_validate(ifobject);
- }
- if (sigvar)
- break;
- }
-
- print_verbose("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
+ if (test_type == TEST_TYPE_STATS)
+ while (!rx_stats_are_valid(ifobject))
+ continue;
+ else
+ receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
if (test_type == TEST_TYPE_TEARDOWN)
print_verbose("Destroying socket\n");
@@ -890,10 +862,18 @@ static void testapp_validate(void)
{
bool bidi = test_type == TEST_TYPE_BIDI;
bool bpf = test_type == TEST_TYPE_BPF_RES;
+ struct pkt_stream *pkt_stream;
if (pthread_barrier_init(&barr, NULL, 2))
exit_with_error(errno);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+ else
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
+ ifdict_tx->pkt_stream = pkt_stream;
+ ifdict_rx->pkt_stream = pkt_stream;
+
/*Spawn RX thread */
pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
@@ -907,15 +887,6 @@ static void testapp_validate(void)
pthread_join(t1, NULL);
pthread_join(t0, NULL);
- if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
- worker_pkt_dump();
- for (int iter = 0; iter < num_frames - 1; iter++) {
- free(pkt_buf[iter]->payload);
- free(pkt_buf[iter]);
- }
- free(pkt_buf);
- }
-
if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
print_ksft_result();
}
@@ -925,9 +896,6 @@ static void testapp_teardown(void)
int i;
for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
}
@@ -953,9 +921,6 @@ static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
static void testapp_bidi(void)
{
for (int i = 0; i < MAX_BIDI_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step) {
@@ -987,9 +952,6 @@ static void testapp_bpf_res(void)
int i;
for (i = 0; i < MAX_BPF_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step)
@@ -1017,6 +979,8 @@ static void testapp_stats(void)
case STAT_TEST_RX_FULL:
rxqsize = RX_FULL_RXQSIZE;
break;
+ case STAT_TEST_TX_INVALID:
+ continue;
default:
break;
}
@@ -1062,10 +1026,7 @@ static void run_pkt_test(int mode, int type)
/* reset defaults after potential previous test */
xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
- pkt_counter = 0;
second_step = 0;
- prev_pkt = -1;
- sigvar = 0;
stat_test_type = -1;
rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
@@ -1102,62 +1063,70 @@ static void run_pkt_test(int mode, int type)
}
}
+static struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+ if (!ifobj->umem_arr)
+ goto out_umem_arr;
+
+ return ifobj;
+
+out_umem_arr:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+static void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem_arr);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
+
int main(int argc, char **argv)
{
struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
- bool failure = false;
int i, j;
if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
exit_with_error(errno);
- for (int i = 0; i < MAX_INTERFACES; i++) {
- ifdict[i] = malloc(sizeof(struct ifobject));
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ ifdict[i] = ifobject_create();
if (!ifdict[i])
- exit_with_error(errno);
-
- ifdict[i]->ifdict_index = i;
- ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
- if (!ifdict[i]->xsk_arr) {
- failure = true;
- goto cleanup;
- }
- ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
- if (!ifdict[i]->umem_arr) {
- failure = true;
- goto cleanup;
- }
+ exit_with_error(ENOMEM);
}
setlocale(LC_ALL, "");
parse_command_line(argc, argv);
- num_frames = ++opt_pkt_count;
-
- init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
- init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
+ init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+ init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
- for (i = 0; i < TEST_MODE_MAX; i++) {
- for (j = 0; j < TEST_TYPE_MAX; j++)
+ for (i = 0; i < TEST_MODE_MAX; i++)
+ for (j = 0; j < TEST_TYPE_MAX; j++) {
run_pkt_test(i, j);
- }
-
-cleanup:
- for (int i = 0; i < MAX_INTERFACES; i++) {
- if (ifdict[i]->ns_fd != -1)
- close(ifdict[i]->ns_fd);
- free(ifdict[i]->xsk_arr);
- free(ifdict[i]->umem_arr);
- free(ifdict[i]);
- }
+ usleep(USLEEP_MAX);
+ }
- if (failure)
- exit_with_error(errno);
+ for (i = 0; i < MAX_INTERFACES; i++)
+ ifobject_delete(ifdict[i]);
ksft_exit_pass();
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 6c428b276ab6..7e49b9fbe25e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -34,28 +34,23 @@
#define IP_PKT_TOS 0x9
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define EOT (-1)
-#define USLEEP_MAX 200000
+#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
+#define BATCH_SIZE 8
#define POLL_TMOUT 1000
-#define DEFAULT_PKT_CNT 10000
+#define DEFAULT_PKT_CNT (4 * 1024)
#define RX_FULL_RXQSIZE 32
+#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8 u8;
-
-enum TEST_MODES {
- TEST_MODE_UNCONFIGURED = -1,
+enum test_mode {
TEST_MODE_SKB,
TEST_MODE_DRV,
TEST_MODE_MAX
};
-enum TEST_TYPES {
+enum test_type {
TEST_TYPE_NOPOLL,
TEST_TYPE_POLL,
TEST_TYPE_TEARDOWN,
@@ -65,7 +60,7 @@ enum TEST_TYPES {
TEST_TYPE_MAX
};
-enum STAT_TEST_TYPES {
+enum stat_test_type {
STAT_TEST_RX_DROPPED,
STAT_TEST_TX_INVALID,
STAT_TEST_RX_FULL,
@@ -73,21 +68,16 @@ enum STAT_TEST_TYPES {
STAT_TEST_TYPE_MAX
};
-static int configured_mode = TEST_MODE_UNCONFIGURED;
-static u8 debug_pkt_dump;
-static u32 num_frames;
+static int configured_mode;
+static bool opt_pkt_dump;
+static u32 num_frames = DEFAULT_PKT_CNT / 4;
static bool second_step;
static int test_type;
-static int opt_pkt_count;
-static u8 opt_verbose;
+static bool opt_verbose;
static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-static u32 pkt_counter;
-static long prev_pkt = -1;
-static int sigvar;
static int stat_test_type;
static u32 rxqsize;
static u32 frame_headroom;
@@ -104,10 +94,6 @@ struct xsk_socket_info {
struct xsk_ring_prod tx;
struct xsk_umem_info *umem;
struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
u32 outstanding_tx;
};
@@ -118,8 +104,15 @@ struct flow_vector {
} vector;
};
-struct generic_data {
- u32 seqnum;
+struct pkt {
+ u64 addr;
+ u32 len;
+ u32 payload;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ struct pkt *pkts;
};
struct ifobject {
@@ -131,8 +124,8 @@ struct ifobject {
struct xsk_umem_info *umem;
void *(*func_ptr)(void *arg);
struct flow_vector fv;
+ struct pkt_stream *pkt_stream;
int ns_fd;
- int ifdict_index;
u32 dst_ip;
u32 src_ip;
u16 src_port;
@@ -149,18 +142,4 @@ static struct ifobject *ifdict_tx;
pthread_barrier_t barr;
pthread_t t0, t1;
-TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
-struct head_s *head_p;
-struct pkt {
- char *pkt_frame;
-
- TAILQ_ENTRY(pkt) pkt_nodes;
-} *pkt_node_rx, *pkt_node_rx_q;
-
-struct pkt_frame {
- char *payload;
-} *pkt_obj;
-
-struct pkt_frame **pkt_buf;
-
#endif /* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index dac1c5f78752..bf29d2549bee 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,14 +8,8 @@ ksft_xfail=2
ksft_xpass=3
ksft_skip=4
-GREEN='\033[0;92m'
-YELLOW='\033[0;93m'
-RED='\033[0;31m'
-NC='\033[0m'
-STACK_LIM=131072
SPECFILE=veth.spec
XSKOBJ=xdpxceiver
-NUMPKTS=10000
validate_root_exec()
{
@@ -50,22 +44,12 @@ validate_veth_spec_file()
test_status()
{
statusval=$1
- if [ -n "${colorconsole+set}" ]; then
- if [ $statusval -eq 2 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
- fi
- else
- if [ $statusval -eq 2 ]; then
- echo -e "$2: [ FAIL ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "$2: [ SKIPPED ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "$2: [ PASS ]"
- fi
+ if [ $statusval -eq 2 ]; then
+ echo -e "$2: [ FAIL ]"
+ elif [ $statusval -eq 1 ]; then
+ echo -e "$2: [ SKIPPED ]"
+ elif [ $statusval -eq 0 ]; then
+ echo -e "$2: [ PASS ]"
fi
}
@@ -107,5 +91,5 @@ validate_ip_utility()
execxdpxceiver()
{
- ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
}