aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/common.c8
-rw-r--r--tools/bpf/bpftool/feature.c4
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/main.c6
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/bpf/bpftool/map.c2
-rw-r--r--tools/bpf/bpftool/perf.c112
-rw-r--r--tools/bpf/bpftool/pids.c1
-rw-r--r--tools/bpf/bpftool/prog.c4
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/bpf/bpftool/tracelog.c2
-rw-r--r--tools/bpf/runqslower/runqslower.c18
-rw-r--r--tools/include/uapi/asm-generic/socket.h2
-rw-r--r--tools/include/uapi/asm/bpf_perf_event.h2
-rw-r--r--tools/include/uapi/linux/bpf.h12
-rw-r--r--tools/include/uapi/linux/btf.h4
-rw-r--r--tools/lib/bpf/Build3
-rw-r--r--tools/lib/bpf/Makefile2
-rw-r--r--tools/lib/bpf/bpf.c34
-rw-r--r--tools/lib/bpf/bpf_helpers.h7
-rw-r--r--tools/lib/bpf/bpf_tracing.h23
-rw-r--r--tools/lib/bpf/btf.c15
-rw-r--r--tools/lib/bpf/libbpf.c808
-rw-r--r--tools/lib/bpf/libbpf.h123
-rw-r--r--tools/lib/bpf/libbpf.map1
-rw-r--r--tools/lib/bpf/libbpf_internal.h37
-rw-r--r--tools/lib/bpf/relo_core.c104
-rw-r--r--tools/lib/bpf/relo_core.h6
-rw-r--r--tools/lib/bpf/usdt.bpf.h259
-rw-r--r--tools/lib/bpf/usdt.c1518
-rw-r--r--tools/testing/selftests/bpf/Makefile25
-rw-r--r--tools/testing/selftests/bpf/bench.c1
-rw-r--r--tools/testing/selftests/bpf/bpf_rlimit.h28
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c6
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/helper_restricted.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_funcs.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_strncmp.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c421
-rw-r--r--tools/testing/selftests/bpf/progs/exhandler_kern.c15
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c27
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c15
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c15
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c190
-rw-r--r--tools/testing/selftests/bpf/progs/perf_event_stackmap.c4
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h4
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600.c11
-rw-r--r--tools/testing/selftests/bpf/progs/skb_load_bytes.c19
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_test.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func17.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_helper_restricted.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_log_fixup.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_urandom_usdt.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c96
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt_multispec.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c12
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c2
-rw-r--r--tools/testing/selftests/bpf/sdt-config.h6
-rw-r--r--tools/testing/selftests/bpf/sdt.h513
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py2
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c7
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c4
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c43
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c70
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c487
-rw-r--r--tools/testing/selftests/bpf/test_progs.h64
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c4
-rw-r--r--tools/testing/selftests/bpf/test_sock.c6
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c4
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c5
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c6
-rw-r--r--tools/testing/selftests/bpf/test_tag.c4
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c4
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c1
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c55
-rw-r--r--tools/testing/selftests/bpf/test_verifier_log.c5
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c91
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h8
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c9
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c63
-rw-r--r--tools/testing/selftests/bpf/urandom_read_aux.c9
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib1.c13
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib2.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c20
-rw-r--r--tools/testing/selftests/bpf/verifier/map_kptr.c469
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c6
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c1
-rw-r--r--tools/testing/selftests/bpf/xdping.c8
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c6
l---------tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_mld.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh1
-rw-r--r--tools/testing/selftests/drivers/net/dsa/forwarding.config2
l---------tools/testing/selftests/drivers/net/dsa/lib.sh1
l---------tools/testing/selftests/drivers/net/dsa/local_termination.sh1
l---------tools/testing/selftests/drivers/net/dsa/no_forwarding.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh341
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_burst.sh480
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/basic_qos.sh253
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/psfp.sh327
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh24
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh103
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/net/forwarding/lib.sh133
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh299
-rwxr-xr-xtools/testing/selftests/net/forwarding/no_forwarding.sh261
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_vid_1.sh27
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh235
-rw-r--r--tools/testing/selftests/net/mptcp/config8
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh38
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh232
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c645
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh779
-rwxr-xr-xtools/testing/selftests/net/ndisc_unsolicited_na_test.sh255
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh48
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh50
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh23
157 files changed, 10805 insertions, 899 deletions
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 0c1e06cf50b9..c740142c24d8 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -17,7 +17,6 @@
#include <linux/magic.h>
#include <net/if.h>
#include <sys/mount.h>
-#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
@@ -119,13 +118,6 @@ static bool is_bpffs(char *path)
return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
}
-void set_max_rlimit(void)
-{
- struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
-
- setrlimit(RLIMIT_MEMLOCK, &rinf);
-}
-
static int
mnt_fs(const char *target, const char *type, char *buff, size_t bufflen)
{
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 290998c82de1..be130e35462f 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -567,7 +567,7 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
res = probe_prog_type_ifindex(prog_type, ifindex);
} else {
- res = libbpf_probe_bpf_prog_type(prog_type, NULL);
+ res = libbpf_probe_bpf_prog_type(prog_type, NULL) > 0;
}
#ifdef USE_LIBCAP
@@ -1136,8 +1136,6 @@ static int do_probe(int argc, char **argv)
__u32 ifindex = 0;
char *ifname;
- set_max_rlimit();
-
while (argc) {
if (is_prefix(*argv, "kernel")) {
if (target != COMPONENT_UNSPEC) {
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 97dec81950e5..8fb0116f9136 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -20,6 +20,9 @@ static const char * const link_type_name[] = {
[BPF_LINK_TYPE_CGROUP] = "cgroup",
[BPF_LINK_TYPE_ITER] = "iter",
[BPF_LINK_TYPE_NETNS] = "netns",
+ [BPF_LINK_TYPE_XDP] = "xdp",
+ [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
};
static struct hashmap *link_table;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index e81227761f5d..9062ef2b8767 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -507,9 +507,9 @@ int main(int argc, char **argv)
* It will still be rejected if users use LIBBPF_STRICT_ALL
* mode for loading generated skeleton.
*/
- ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
- if (ret)
- p_err("failed to enable libbpf strict mode: %d", ret);
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
+ } else {
+ libbpf_set_strict_mode(LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK);
}
argc -= optind;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6e9277ffc68c..aa99ffab451a 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -102,8 +102,6 @@ int detect_common_prefix(const char *arg, ...);
void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
void usage(void) __noreturn;
-void set_max_rlimit(void);
-
int mount_tracefs(const char *target);
struct obj_ref {
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index c26378f20831..877387ef79c7 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1342,8 +1342,6 @@ static int do_create(int argc, char **argv)
goto exit;
}
- set_max_rlimit();
-
fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr);
if (fd < 0) {
p_err("map create failed: %s", strerror(errno));
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index 50de087b0db7..226ec2c39052 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -11,7 +11,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
-#include <ftw.h>
+#include <dirent.h>
#include <bpf/bpf.h>
@@ -147,81 +147,83 @@ static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
}
}
-static int show_proc(const char *fpath, const struct stat *sb,
- int tflag, struct FTW *ftwbuf)
+static int show_proc(void)
{
+ struct dirent *proc_de, *pid_fd_de;
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
- int err, pid = 0, fd = 0;
+ DIR *proc, *pid_fd;
+ int err, pid, fd;
const char *pch;
char buf[4096];
- /* prefix always /proc */
- pch = fpath + 5;
- if (*pch == '\0')
- return 0;
+ proc = opendir("/proc");
+ if (!proc)
+ return -1;
- /* pid should be all numbers */
- pch++;
- while (isdigit(*pch)) {
- pid = pid * 10 + *pch - '0';
- pch++;
- }
- if (*pch == '\0')
- return 0;
- if (*pch != '/')
- return FTW_SKIP_SUBTREE;
-
- /* check /proc/<pid>/fd directory */
- pch++;
- if (strncmp(pch, "fd", 2))
- return FTW_SKIP_SUBTREE;
- pch += 2;
- if (*pch == '\0')
- return 0;
- if (*pch != '/')
- return FTW_SKIP_SUBTREE;
-
- /* check /proc/<pid>/fd/<fd_num> */
- pch++;
- while (isdigit(*pch)) {
- fd = fd * 10 + *pch - '0';
- pch++;
- }
- if (*pch != '\0')
- return FTW_SKIP_SUBTREE;
+ while ((proc_de = readdir(proc))) {
+ pid = 0;
+ pch = proc_de->d_name;
- /* query (pid, fd) for potential perf events */
- len = sizeof(buf);
- err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
- &probe_offset, &probe_addr);
- if (err < 0)
- return 0;
+ /* pid should be all numbers */
+ while (isdigit(*pch)) {
+ pid = pid * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch != '\0')
+ continue;
- if (json_output)
- print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
- probe_addr);
- else
- print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
- probe_addr);
+ err = snprintf(buf, sizeof(buf), "/proc/%s/fd", proc_de->d_name);
+ if (err < 0 || err >= (int)sizeof(buf))
+ continue;
+
+ pid_fd = opendir(buf);
+ if (!pid_fd)
+ continue;
+ while ((pid_fd_de = readdir(pid_fd))) {
+ fd = 0;
+ pch = pid_fd_de->d_name;
+
+ /* fd should be all numbers */
+ while (isdigit(*pch)) {
+ fd = fd * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch != '\0')
+ continue;
+
+ /* query (pid, fd) for potential perf events */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(pid, fd, 0, buf, &len,
+ &prog_id, &fd_type,
+ &probe_offset, &probe_addr);
+ if (err < 0)
+ continue;
+
+ if (json_output)
+ print_perf_json(pid, fd, prog_id, fd_type, buf,
+ probe_offset, probe_addr);
+ else
+ print_perf_plain(pid, fd, prog_id, fd_type, buf,
+ probe_offset, probe_addr);
+ }
+ closedir(pid_fd);
+ }
+ closedir(proc);
return 0;
}
static int do_show(int argc, char **argv)
{
- int flags = FTW_ACTIONRETVAL | FTW_PHYS;
- int err = 0, nopenfd = 16;
+ int err;
if (!has_perf_query_support())
return -1;
if (json_output)
jsonw_start_array(json_wtr);
- if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
- p_err("%s", strerror(errno));
- err = -1;
- }
+ err = show_proc();
if (json_output)
jsonw_end_array(json_wtr);
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index bb6c969a114a..e2d00d3cd868 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -108,7 +108,6 @@ int build_obj_refs_table(struct hashmap **map, enum bpf_obj_type type)
p_err("failed to create hashmap for PID references");
return -1;
}
- set_max_rlimit();
skel = pid_iter_bpf__open();
if (!skel) {
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index bc4e05542c2b..5c2c63df92e8 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -68,6 +68,7 @@ const char * const prog_type_name[] = {
[BPF_PROG_TYPE_EXT] = "ext",
[BPF_PROG_TYPE_LSM] = "lsm",
[BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
+ [BPF_PROG_TYPE_SYSCALL] = "syscall",
};
const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
@@ -1603,8 +1604,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
}
}
- set_max_rlimit();
-
if (verifier_logs)
/* log_level1 + log_level2 + stats, but not stable UAPI */
open_opts.kernel_log_level = 1 + 2 + 4;
@@ -2302,7 +2301,6 @@ static int do_profile(int argc, char **argv)
}
}
- set_max_rlimit();
err = profiler_bpf__load(profile_obj);
if (err) {
p_err("failed to load profile_obj");
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index e08a6ff2866c..2535f079ed67 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -501,8 +501,6 @@ static int do_register(int argc, char **argv)
if (libbpf_get_error(obj))
return -1;
- set_max_rlimit();
-
if (bpf_object__load(obj)) {
bpf_object__close(obj);
return -1;
diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c
index e80a5c79b38f..bf1f02212797 100644
--- a/tools/bpf/bpftool/tracelog.c
+++ b/tools/bpf/bpftool/tracelog.c
@@ -9,7 +9,7 @@
#include <string.h>
#include <unistd.h>
#include <linux/magic.h>
-#include <sys/fcntl.h>
+#include <fcntl.h>
#include <sys/vfs.h>
#include "main.h"
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
index d78f4148597f..83c5993a139a 100644
--- a/tools/bpf/runqslower/runqslower.c
+++ b/tools/bpf/runqslower/runqslower.c
@@ -4,7 +4,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/resource.h>
#include <time.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
@@ -88,16 +87,6 @@ int libbpf_print_fn(enum libbpf_print_level level,
return vfprintf(stderr, format, args);
}
-static int bump_memlock_rlimit(void)
-{
- struct rlimit rlim_new = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
-
- return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
-}
-
void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
const struct runq_event *e = data;
@@ -133,11 +122,8 @@ int main(int argc, char **argv)
libbpf_set_print(libbpf_print_fn);
- err = bump_memlock_rlimit();
- if (err) {
- fprintf(stderr, "failed to increase rlimit: %d", err);
- return 1;
- }
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
obj = runqslower_bpf__open();
if (!obj) {
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 77f7c1638eb1..8756df13be50 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -119,6 +119,8 @@
#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_RCVMARK 75
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h
index 39acc149d843..d7dfeab0d71a 100644
--- a/tools/include/uapi/asm/bpf_perf_event.h
+++ b/tools/include/uapi/asm/bpf_perf_event.h
@@ -1,5 +1,7 @@
#if defined(__aarch64__)
#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h"
+#elif defined(__arc__)
+#include "../../arch/arc/include/uapi/asm/bpf_perf_event.h"
#elif defined(__s390__)
#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
#elif defined(__riscv)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d14b10b85e51..444fe6f1cf35 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5143,6 +5143,17 @@ union bpf_attr {
* The **hash_algo** is returned on success,
* **-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
* invalid arguments are passed.
+ *
+ * void *bpf_kptr_xchg(void *map_value, void *ptr)
+ * Description
+ * Exchange kptr at pointer *map_value* with *ptr*, and return the
+ * old value. *ptr* can be NULL, otherwise it must be a referenced
+ * pointer which will be released when this helper is called.
+ * Return
+ * The old value of kptr (which can be NULL). The returned pointer
+ * if not NULL, is a reference which must be released using its
+ * corresponding release function, or moved into a BPF map before
+ * program exit.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5339,6 +5350,7 @@ union bpf_attr {
FN(copy_from_user_task), \
FN(skb_set_tstamp), \
FN(ima_file_hash), \
+ FN(kptr_xchg), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index b0d8fea1951d..a9162a6c0284 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -33,8 +33,8 @@ struct btf_type {
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
- * bits 24-27: kind (e.g. int, ptr, array...etc)
- * bits 28-30: unused
+ * bits 24-28: kind (e.g. int, ptr, array...etc)
+ * bits 29-30: unused
* bit 31: kind_flag, currently used by
* struct, union and fwd
*/
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 94f0a146bb7b..31a1a9015902 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,4 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
+ usdt.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 064c89e31560..64741c55b8e3 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -239,7 +239,7 @@ install_lib: all_cmd
SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h \
- skel_internal.h libbpf_version.h
+ skel_internal.h libbpf_version.h usdt.bpf.h
GEN_HDRS := $(BPF_GENERATED)
INSTALL_PFX := $(DESTDIR)$(prefix)/include/bpf
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index cf27251adb92..a9d292c106c2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -817,7 +817,7 @@ int bpf_link_create(int prog_fd, int target_fd,
{
__u32 target_btf_id, iter_info_len;
union bpf_attr attr;
- int fd;
+ int fd, err;
if (!OPTS_VALID(opts, bpf_link_create_opts))
return libbpf_err(-EINVAL);
@@ -870,7 +870,37 @@ int bpf_link_create(int prog_fd, int target_fd,
}
proceed:
fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr));
- return libbpf_err_errno(fd);
+ if (fd >= 0)
+ return fd;
+ /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry
+ * and other similar programs
+ */
+ err = -errno;
+ if (err != -EINVAL)
+ return libbpf_err(err);
+
+ /* if user used features not supported by
+ * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately
+ */
+ if (attr.link_create.target_fd || attr.link_create.target_btf_id)
+ return libbpf_err(err);
+ if (!OPTS_ZEROED(opts, sz))
+ return libbpf_err(err);
+
+ /* otherwise, for few select kinds of programs that can be
+ * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as
+ * a fallback for older kernels
+ */
+ switch (attach_type) {
+ case BPF_TRACE_RAW_TP:
+ case BPF_LSM_MAC:
+ case BPF_TRACE_FENTRY:
+ case BPF_TRACE_FEXIT:
+ case BPF_MODIFY_RETURN:
+ return bpf_raw_tracepoint_open(NULL, prog_fd);
+ default:
+ return libbpf_err(err);
+ }
}
int bpf_link_detach(int link_fd)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 44df982d2a5c..5de3eb267125 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -149,6 +149,13 @@ enum libbpf_tristate {
#define __kconfig __attribute__((section(".kconfig")))
#define __ksym __attribute__((section(".ksyms")))
+#if __has_attribute(btf_type_tag)
+#define __kptr __attribute__((btf_type_tag("kptr")))
+#define __kptr_ref __attribute__((btf_type_tag("kptr_ref")))
+#else
+#define __kptr
+#define __kptr_ref
+#endif
#ifndef ___bpf_concat
#define ___bpf_concat(a, b) a ## b
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index e3a8c947e89f..01ce121c302d 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -27,6 +27,9 @@
#elif defined(__TARGET_ARCH_riscv)
#define bpf_target_riscv
#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arc)
+ #define bpf_target_arc
+ #define bpf_target_defined
#else
/* Fall back to what the compiler says */
@@ -54,6 +57,9 @@
#elif defined(__riscv) && __riscv_xlen == 64
#define bpf_target_riscv
#define bpf_target_defined
+#elif defined(__arc__)
+ #define bpf_target_arc
+ #define bpf_target_defined
#endif /* no compiler target */
#endif
@@ -233,6 +239,23 @@ struct pt_regs___arm64 {
/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
#define PT_REGS_SYSCALL_REGS(ctx) ctx
+#elif defined(bpf_target_arc)
+
+/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */
+#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
+#define __PT_PARM1_REG scratch.r0
+#define __PT_PARM2_REG scratch.r1
+#define __PT_PARM3_REG scratch.r2
+#define __PT_PARM4_REG scratch.r3
+#define __PT_PARM5_REG scratch.r4
+#define __PT_RET_REG scratch.blink
+#define __PT_FP_REG __unsupported__
+#define __PT_RC_REG scratch.r0
+#define __PT_SP_REG scratch.sp
+#define __PT_IP_REG scratch.ret
+/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */
+#define PT_REGS_SYSCALL_REGS(ctx) ctx
+
#endif
#if defined(bpf_target_defined)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 1383e26c5d1f..bb1e06eb1eca 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -2626,6 +2626,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
const struct btf_ext_info_sec *sinfo;
struct btf_ext_info *ext_info;
__u32 info_left, record_size;
+ size_t sec_cnt = 0;
/* The start of the info sec (including the __u32 record_size). */
void *info;
@@ -2689,8 +2690,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
return -EINVAL;
}
- total_record_size = sec_hdrlen +
- (__u64)num_records * record_size;
+ total_record_size = sec_hdrlen + (__u64)num_records * record_size;
if (info_left < total_record_size) {
pr_debug("%s section has incorrect num_records in .BTF.ext\n",
ext_sec->desc);
@@ -2699,12 +2699,14 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext,
info_left -= total_record_size;
sinfo = (void *)sinfo + total_record_size;
+ sec_cnt++;
}
ext_info = ext_sec->ext_info;
ext_info->len = ext_sec->len - sizeof(__u32);
ext_info->rec_size = record_size;
ext_info->info = info + sizeof(__u32);
+ ext_info->sec_cnt = sec_cnt;
return 0;
}
@@ -2788,6 +2790,9 @@ void btf_ext__free(struct btf_ext *btf_ext)
{
if (IS_ERR_OR_NULL(btf_ext))
return;
+ free(btf_ext->func_info.sec_idxs);
+ free(btf_ext->line_info.sec_idxs);
+ free(btf_ext->core_relo_info.sec_idxs);
free(btf_ext->data);
free(btf_ext);
}
@@ -2826,10 +2831,8 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)
if (err)
goto done;
- if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
- err = -EINVAL;
- goto done;
- }
+ if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+ goto done; /* skip core relos parsing */
err = btf_ext_setup_core_relos(btf_ext);
if (err)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 809fe209cdcc..73a5192defb3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -302,7 +302,7 @@ struct bpf_program {
void *priv;
bpf_program_clear_priv_t clear_priv;
- bool load;
+ bool autoload;
bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
@@ -483,6 +483,8 @@ struct elf_state {
int st_ops_shndx;
};
+struct usdt_manager;
+
struct bpf_object {
char name[BPF_OBJ_NAME_LEN];
char license[64];
@@ -545,6 +547,8 @@ struct bpf_object {
size_t fd_array_cap;
size_t fd_array_cnt;
+ struct usdt_manager *usdt_man;
+
char path[];
};
@@ -668,7 +672,18 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
prog->insns_cnt = prog->sec_insn_cnt;
prog->type = BPF_PROG_TYPE_UNSPEC;
- prog->load = true;
+
+ /* libbpf's convention for SEC("?abc...") is that it's just like
+ * SEC("abc...") but the corresponding bpf_program starts out with
+ * autoload set to false.
+ */
+ if (sec_name[0] == '?') {
+ prog->autoload = false;
+ /* from now on forget there was ? in section name */
+ sec_name++;
+ } else {
+ prog->autoload = true;
+ }
prog->instances.fds = NULL;
prog->instances.nr = -1;
@@ -1218,10 +1233,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
if (!obj->efile.elf)
return;
- if (obj->efile.elf) {
- elf_end(obj->efile.elf);
- obj->efile.elf = NULL;
- }
+ elf_end(obj->efile.elf);
+ obj->efile.elf = NULL;
obj->efile.symbols = NULL;
obj->efile.st_ops_data = NULL;
@@ -1397,8 +1410,11 @@ static int find_elf_var_offset(const struct bpf_object *obj, const char *name, _
for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
Elf64_Sym *sym = elf_sym_by_idx(obj, si);
- if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
- ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
+ if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
+ continue;
+
+ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym->st_info) != STB_WEAK)
continue;
sname = elf_sym_str(obj, sym->st_name);
@@ -2749,6 +2765,9 @@ static int bpf_object__init_btf(struct bpf_object *obj,
btf__set_pointer_size(obj->btf, 8);
}
if (btf_ext_data) {
+ struct btf_ext_info *ext_segs[3];
+ int seg_num, sec_num;
+
if (!obj->btf) {
pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
BTF_EXT_ELF_SEC, BTF_ELF_SEC);
@@ -2762,6 +2781,43 @@ static int bpf_object__init_btf(struct bpf_object *obj,
obj->btf_ext = NULL;
goto out;
}
+
+ /* setup .BTF.ext to ELF section mapping */
+ ext_segs[0] = &obj->btf_ext->func_info;
+ ext_segs[1] = &obj->btf_ext->line_info;
+ ext_segs[2] = &obj->btf_ext->core_relo_info;
+ for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
+ struct btf_ext_info *seg = ext_segs[seg_num];
+ const struct btf_ext_info_sec *sec;
+ const char *sec_name;
+ Elf_Scn *scn;
+
+ if (seg->sec_cnt == 0)
+ continue;
+
+ seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
+ if (!seg->sec_idxs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ sec_num = 0;
+ for_each_btf_ext_sec(seg, sec) {
+ /* preventively increment index to avoid doing
+ * this before every continue below
+ */
+ sec_num++;
+
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (str_is_empty(sec_name))
+ continue;
+ scn = elf_sec_by_name(obj, sec_name);
+ if (!scn)
+ continue;
+
+ seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
+ }
+ }
}
out:
if (err && libbpf_needs_btf(obj)) {
@@ -2920,7 +2976,7 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
}
bpf_object__for_each_program(prog, obj) {
- if (!prog->load)
+ if (!prog->autoload)
continue;
if (prog_needs_vmlinux_btf(prog))
return true;
@@ -4587,7 +4643,7 @@ static int probe_kern_probe_read_kernel(void)
};
int fd, insn_cnt = ARRAY_SIZE(insns);
- fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
+ fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
return probe_fd(fd);
}
@@ -4678,6 +4734,18 @@ static int probe_perf_link(void)
return link_fd < 0 && err == -EBADF;
}
+static int probe_kern_bpf_cookie(void)
+{
+ struct bpf_insn insns[] = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
+ BPF_EXIT_INSN(),
+ };
+ int ret, insn_cnt = ARRAY_SIZE(insns);
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
+ return probe_fd(ret);
+}
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -4740,6 +4808,9 @@ static struct kern_feature_desc {
[FEAT_MEMCG_ACCOUNT] = {
"memcg-based memory accounting", probe_memcg_account,
},
+ [FEAT_BPF_COOKIE] = {
+ "BPF cookie support", probe_kern_bpf_cookie,
+ },
};
bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -5555,6 +5626,22 @@ static int record_relo_core(struct bpf_program *prog,
return 0;
}
+static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
+{
+ struct reloc_desc *relo;
+ int i;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ relo = &prog->reloc_desc[i];
+ if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
+ continue;
+
+ return relo->core_relo;
+ }
+
+ return NULL;
+}
+
static int bpf_core_resolve_relo(struct bpf_program *prog,
const struct bpf_core_relo *relo,
int relo_idx,
@@ -5611,7 +5698,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
struct bpf_program *prog;
struct bpf_insn *insn;
const char *sec_name;
- int i, err = 0, insn_idx, sec_idx;
+ int i, err = 0, insn_idx, sec_idx, sec_num;
if (obj->btf_ext->core_relo_info.len == 0)
return 0;
@@ -5632,32 +5719,18 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
}
seg = &obj->btf_ext->core_relo_info;
+ sec_num = 0;
for_each_btf_ext_sec(seg, sec) {
+ sec_idx = seg->sec_idxs[sec_num];
+ sec_num++;
+
sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
if (str_is_empty(sec_name)) {
err = -EINVAL;
goto out;
}
- /* bpf_object's ELF is gone by now so it's not easy to find
- * section index by section name, but we can find *any*
- * bpf_program within desired section name and use it's
- * prog->sec_idx to do a proper search by section index and
- * instruction offset
- */
- prog = NULL;
- for (i = 0; i < obj->nr_programs; i++) {
- prog = &obj->programs[i];
- if (strcmp(prog->sec_name, sec_name) == 0)
- break;
- }
- if (!prog) {
- pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
- return -ENOENT;
- }
- sec_idx = prog->sec_idx;
- pr_debug("sec '%s': found %d CO-RE relocations\n",
- sec_name, sec->num_info);
+ pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
for_each_btf_ext_rec(seg, sec, i, rec) {
if (rec->insn_off % BPF_INSN_SZ)
@@ -5665,15 +5738,22 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
insn_idx = rec->insn_off / BPF_INSN_SZ;
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
if (!prog) {
- pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
- sec_name, insn_idx, i);
- err = -EINVAL;
- goto out;
+ /* When __weak subprog is "overridden" by another instance
+ * of the subprog from a different object file, linker still
+ * appends all the .BTF.ext info that used to belong to that
+ * eliminated subprogram.
+ * This is similar to what x86-64 linker does for relocations.
+ * So just ignore such relocations just like we ignore
+ * subprog instructions when discovering subprograms.
+ */
+ pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
+ sec_name, i, insn_idx);
+ continue;
}
/* no need to apply CO-RE relocation if the program is
* not going to be loaded
*/
- if (!prog->load)
+ if (!prog->autoload)
continue;
/* adjust insn_idx from section frame of reference to the local
@@ -5685,16 +5765,16 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
return -EINVAL;
insn = &prog->insns[insn_idx];
- if (prog->obj->gen_loader) {
- err = record_relo_core(prog, rec, insn_idx);
- if (err) {
- pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
- prog->name, i, err);
- goto out;
- }
- continue;
+ err = record_relo_core(prog, rec, insn_idx);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
+ prog->name, i, err);
+ goto out;
}
+ if (prog->obj->gen_loader)
+ continue;
+
err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
if (err) {
pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
@@ -5834,14 +5914,13 @@ static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
void *rec, *rec_end, *new_prog_info;
const struct btf_ext_info_sec *sec;
size_t old_sz, new_sz;
- const char *sec_name;
- int i, off_adj;
+ int i, sec_num, sec_idx, off_adj;
+ sec_num = 0;
for_each_btf_ext_sec(ext_info, sec) {
- sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
- if (!sec_name)
- return -EINVAL;
- if (strcmp(sec_name, prog->sec_name) != 0)
+ sec_idx = ext_info->sec_idxs[sec_num];
+ sec_num++;
+ if (prog->sec_idx != sec_idx)
continue;
for_each_btf_ext_rec(ext_info, sec, i, rec) {
@@ -6236,7 +6315,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
if (err)
return err;
-
return 0;
}
@@ -6297,8 +6375,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
err);
return err;
}
- if (obj->gen_loader)
- bpf_object__sort_relos(obj);
+ bpf_object__sort_relos(obj);
}
/* Before relocating calls pre-process relocations and mark
@@ -6334,7 +6411,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
*/
if (prog_is_subprog(obj, prog))
continue;
- if (!prog->load)
+ if (!prog->autoload)
continue;
err = bpf_object__relocate_calls(obj, prog);
@@ -6349,7 +6426,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
prog = &obj->programs[i];
if (prog_is_subprog(obj, prog))
continue;
- if (!prog->load)
+ if (!prog->autoload)
continue;
err = bpf_object__relocate_data(obj, prog);
if (err) {
@@ -6358,8 +6435,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return err;
}
}
- if (!obj->gen_loader)
- bpf_object__free_relocs(obj);
+
return 0;
}
@@ -6636,6 +6712,8 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
return 0;
}
+static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
+
static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog,
struct bpf_insn *insns, int insns_cnt,
const char *license, __u32 kern_version,
@@ -6782,6 +6860,10 @@ retry_load:
goto retry_load;
ret = -errno;
+
+ /* post-process verifier log to improve error descriptions */
+ fixup_verifier_log(prog, log_buf, log_buf_size);
+
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
pr_perm_msg(ret);
@@ -6790,10 +6872,6 @@ retry_load:
pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
prog->name, log_buf);
}
- if (insns_cnt >= BPF_MAXINSNS) {
- pr_warn("prog '%s': program too large (%d insns), at most %d insns\n",
- prog->name, insns_cnt, BPF_MAXINSNS);
- }
out:
if (own_log_buf)
@@ -6801,6 +6879,128 @@ out:
return ret;
}
+static char *find_prev_line(char *buf, char *cur)
+{
+ char *p;
+
+ if (cur == buf) /* end of a log buf */
+ return NULL;
+
+ p = cur - 1;
+ while (p - 1 >= buf && *(p - 1) != '\n')
+ p--;
+
+ return p;
+}
+
+static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
+ char *orig, size_t orig_sz, const char *patch)
+{
+ /* size of the remaining log content to the right from the to-be-replaced part */
+ size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
+ size_t patch_sz = strlen(patch);
+
+ if (patch_sz != orig_sz) {
+ /* If patch line(s) are longer than original piece of verifier log,
+ * shift log contents by (patch_sz - orig_sz) bytes to the right
+ * starting from after to-be-replaced part of the log.
+ *
+ * If patch line(s) are shorter than original piece of verifier log,
+ * shift log contents by (orig_sz - patch_sz) bytes to the left
+ * starting from after to-be-replaced part of the log
+ *
+ * We need to be careful about not overflowing available
+ * buf_sz capacity. If that's the case, we'll truncate the end
+ * of the original log, as necessary.
+ */
+ if (patch_sz > orig_sz) {
+ if (orig + patch_sz >= buf + buf_sz) {
+ /* patch is big enough to cover remaining space completely */
+ patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
+ rem_sz = 0;
+ } else if (patch_sz - orig_sz > buf_sz - log_sz) {
+ /* patch causes part of remaining log to be truncated */
+ rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
+ }
+ }
+ /* shift remaining log to the right by calculated amount */
+ memmove(orig + patch_sz, orig + orig_sz, rem_sz);
+ }
+
+ memcpy(orig, patch, patch_sz);
+}
+
+static void fixup_log_failed_core_relo(struct bpf_program *prog,
+ char *buf, size_t buf_sz, size_t log_sz,
+ char *line1, char *line2, char *line3)
+{
+ /* Expected log for failed and not properly guarded CO-RE relocation:
+ * line1 -> 123: (85) call unknown#195896080
+ * line2 -> invalid func unknown#195896080
+ * line3 -> <anything else or end of buffer>
+ *
+ * "123" is the index of the instruction that was poisoned. We extract
+ * instruction index to find corresponding CO-RE relocation and
+ * replace this part of the log with more relevant information about
+ * failed CO-RE relocation.
+ */
+ const struct bpf_core_relo *relo;
+ struct bpf_core_spec spec;
+ char patch[512], spec_buf[256];
+ int insn_idx, err;
+
+ if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
+ return;
+
+ relo = find_relo_core(prog, insn_idx);
+ if (!relo)
+ return;
+
+ err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
+ if (err)
+ return;
+
+ bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
+ snprintf(patch, sizeof(patch),
+ "%d: <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation %s\n",
+ insn_idx, spec_buf);
+
+ patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
+}
+
+static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
+{
+ /* look for familiar error patterns in last N lines of the log */
+ const size_t max_last_line_cnt = 10;
+ char *prev_line, *cur_line, *next_line;
+ size_t log_sz;
+ int i;
+
+ if (!buf)
+ return;
+
+ log_sz = strlen(buf) + 1;
+ next_line = buf + log_sz - 1;
+
+ for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
+ cur_line = find_prev_line(buf, next_line);
+ if (!cur_line)
+ return;
+
+ /* failed CO-RE relocation case */
+ if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
+ prev_line = find_prev_line(buf, cur_line);
+ if (!prev_line)
+ continue;
+
+ fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
+ prev_line, cur_line, next_line);
+ return;
+ }
+ }
+}
+
static int bpf_program_record_relos(struct bpf_program *prog)
{
struct bpf_object *obj = prog->obj;
@@ -6946,7 +7146,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
prog = &obj->programs[i];
if (prog_is_subprog(obj, prog))
continue;
- if (!prog->load) {
+ if (!prog->autoload) {
pr_debug("prog '%s': skipped loading\n", prog->name);
continue;
}
@@ -6955,8 +7155,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
if (err)
return err;
}
- if (obj->gen_loader)
- bpf_object__free_relocs(obj);
+
+ bpf_object__free_relocs(obj);
return 0;
}
@@ -6976,8 +7176,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
continue;
}
- bpf_program__set_type(prog, prog->sec_def->prog_type);
- bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type);
+ prog->type = prog->sec_def->prog_type;
+ prog->expected_attach_type = prog->sec_def->expected_attach_type;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
@@ -8200,6 +8400,9 @@ void bpf_object__close(struct bpf_object *obj)
if (obj->clear_priv)
obj->clear_priv(obj, obj->priv);
+ usdt_manager_free(obj->usdt_man);
+ obj->usdt_man = NULL;
+
bpf_gen__free(obj->gen_loader);
bpf_object__elf_finish(obj);
bpf_object_unload(obj);
@@ -8423,7 +8626,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
bool bpf_program__autoload(const struct bpf_program *prog)
{
- return prog->load;
+ return prog->autoload;
}
int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
@@ -8431,7 +8634,7 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
if (prog->obj->loaded)
return libbpf_err(-EINVAL);
- prog->load = autoload;
+ prog->autoload = autoload;
return 0;
}
@@ -8519,9 +8722,13 @@ enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
return prog->type;
}
-void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
+int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
+
prog->type = type;
+ return 0;
}
static bool bpf_program__is_type(const struct bpf_program *prog,
@@ -8535,8 +8742,7 @@ int bpf_program__set_##NAME(struct bpf_program *prog) \
{ \
if (!prog) \
return libbpf_err(-EINVAL); \
- bpf_program__set_type(prog, TYPE); \
- return 0; \
+ return bpf_program__set_type(prog, TYPE); \
} \
\
bool bpf_program__is_##NAME(const struct bpf_program *prog) \
@@ -8566,10 +8772,14 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program
return prog->expected_attach_type;
}
-void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+int bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type)
{
+ if (prog->obj->loaded)
+ return libbpf_err(-EBUSY);
+
prog->expected_attach_type = type;
+ return 0;
}
__u32 bpf_program__flags(const struct bpf_program *prog)
@@ -8630,6 +8840,8 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log
}
static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
@@ -8642,11 +8854,12 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
SEC_DEF("kprobe/", KPROBE, 0, SEC_NONE, attach_kprobe),
- SEC_DEF("uprobe/", KPROBE, 0, SEC_NONE),
+ SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
SEC_DEF("kretprobe/", KPROBE, 0, SEC_NONE, attach_kprobe),
- SEC_DEF("uretprobe/", KPROBE, 0, SEC_NONE),
+ SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
SEC_DEF("kprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
SEC_DEF("kretprobe.multi/", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
+ SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt),
SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE),
SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED),
SEC_DEF("action", SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
@@ -9636,9 +9849,8 @@ static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
* bpf_object__open guessed
*/
if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
- bpf_program__set_type(prog, attr->prog_type);
- bpf_program__set_expected_attach_type(prog,
- attach_type);
+ prog->type = attr->prog_type;
+ prog->expected_attach_type = attach_type;
}
if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) {
/*
@@ -9692,14 +9904,6 @@ int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
return bpf_prog_load_xattr2(&attr, pobj, prog_fd);
}
-struct bpf_link {
- int (*detach)(struct bpf_link *link);
- void (*dealloc)(struct bpf_link *link);
- char *pin_path; /* NULL, if not pinned */
- int fd; /* hook FD, -1 if not applicable */
- bool disconnected;
-};
-
/* Replace link's underlying BPF program with the new one */
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
@@ -10517,6 +10721,273 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
return pfd;
}
+/* uprobes deal in relative offsets; subtract the base address associated with
+ * the mapped binary. See Documentation/trace/uprobetracer.rst for more
+ * details.
+ */
+static long elf_find_relative_offset(const char *filename, Elf *elf, long addr)
+{
+ size_t n;
+ int i;
+
+ if (elf_getphdrnum(elf, &n)) {
+ pr_warn("elf: failed to find program headers for '%s': %s\n", filename,
+ elf_errmsg(-1));
+ return -ENOENT;
+ }
+
+ for (i = 0; i < n; i++) {
+ int seg_start, seg_end, seg_offset;
+ GElf_Phdr phdr;
+
+ if (!gelf_getphdr(elf, i, &phdr)) {
+ pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename,
+ elf_errmsg(-1));
+ return -ENOENT;
+ }
+ if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X))
+ continue;
+
+ seg_start = phdr.p_vaddr;
+ seg_end = seg_start + phdr.p_memsz;
+ seg_offset = phdr.p_offset;
+ if (addr >= seg_start && addr < seg_end)
+ return addr - seg_start + seg_offset;
+ }
+ pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename);
+ return -ENOENT;
+}
+
+/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
+static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
+{
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ GElf_Shdr sh;
+
+ if (!gelf_getshdr(scn, &sh))
+ continue;
+ if (sh.sh_type == sh_type)
+ return scn;
+ }
+ return NULL;
+}
+
+/* Find offset of function name in object specified by path. "name" matches
+ * symbol name or name@@LIB for library functions.
+ */
+static long elf_find_func_offset(const char *binary_path, const char *name)
+{
+ int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
+ bool is_shared_lib, is_name_qualified;
+ char errmsg[STRERR_BUFSIZE];
+ long ret = -ENOENT;
+ size_t name_len;
+ GElf_Ehdr ehdr;
+ Elf *elf;
+
+ fd = open(binary_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ ret = -errno;
+ pr_warn("failed to open %s: %s\n", binary_path,
+ libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
+ return ret;
+ }
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
+ close(fd);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (!gelf_getehdr(elf, &ehdr)) {
+ pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+ /* for shared lib case, we do not need to calculate relative offset */
+ is_shared_lib = ehdr.e_type == ET_DYN;
+
+ name_len = strlen(name);
+ /* Does name specify "@@LIB"? */
+ is_name_qualified = strstr(name, "@@") != NULL;
+
+ /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
+ * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
+ * linked binary may not have SHT_DYMSYM, so absence of a section should not be
+ * reported as a warning/error.
+ */
+ for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
+ size_t nr_syms, strtabidx, idx;
+ Elf_Data *symbols = NULL;
+ Elf_Scn *scn = NULL;
+ int last_bind = -1;
+ const char *sname;
+ GElf_Shdr sh;
+
+ scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
+ if (!scn) {
+ pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
+ binary_path);
+ continue;
+ }
+ if (!gelf_getshdr(scn, &sh))
+ continue;
+ strtabidx = sh.sh_link;
+ symbols = elf_getdata(scn, 0);
+ if (!symbols) {
+ pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
+ binary_path, elf_errmsg(-1));
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+ nr_syms = symbols->d_size / sh.sh_entsize;
+
+ for (idx = 0; idx < nr_syms; idx++) {
+ int curr_bind;
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, idx, &sym))
+ continue;
+
+ if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+ continue;
+
+ sname = elf_strptr(elf, strtabidx, sym.st_name);
+ if (!sname)
+ continue;
+
+ curr_bind = GELF_ST_BIND(sym.st_info);
+
+ /* User can specify func, func@@LIB or func@@LIB_VERSION. */
+ if (strncmp(sname, name, name_len) != 0)
+ continue;
+ /* ...but we don't want a search for "foo" to match 'foo2" also, so any
+ * additional characters in sname should be of the form "@@LIB".
+ */
+ if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
+ continue;
+
+ if (ret >= 0) {
+ /* handle multiple matches */
+ if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
+ /* Only accept one non-weak bind. */
+ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
+ sname, name, binary_path);
+ ret = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ } else if (curr_bind == STB_WEAK) {
+ /* already have a non-weak bind, and
+ * this is a weak bind, so ignore.
+ */
+ continue;
+ }
+ }
+ ret = sym.st_value;
+ last_bind = curr_bind;
+ }
+ /* For binaries that are not shared libraries, we need relative offset */
+ if (ret > 0 && !is_shared_lib)
+ ret = elf_find_relative_offset(binary_path, elf, ret);
+ if (ret > 0)
+ break;
+ }
+
+ if (ret > 0) {
+ pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
+ ret);
+ } else {
+ if (ret == 0) {
+ pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
+ is_shared_lib ? "should not be 0 in a shared library" :
+ "try using shared library path instead");
+ ret = -ENOENT;
+ } else {
+ pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
+ }
+ }
+out:
+ elf_end(elf);
+ close(fd);
+ return ret;
+}
+
+static const char *arch_specific_lib_paths(void)
+{
+ /*
+ * Based on https://packages.debian.org/sid/libc6.
+ *
+ * Assume that the traced program is built for the same architecture
+ * as libbpf, which should cover the vast majority of cases.
+ */
+#if defined(__x86_64__)
+ return "/lib/x86_64-linux-gnu";
+#elif defined(__i386__)
+ return "/lib/i386-linux-gnu";
+#elif defined(__s390x__)
+ return "/lib/s390x-linux-gnu";
+#elif defined(__s390__)
+ return "/lib/s390-linux-gnu";
+#elif defined(__arm__) && defined(__SOFTFP__)
+ return "/lib/arm-linux-gnueabi";
+#elif defined(__arm__) && !defined(__SOFTFP__)
+ return "/lib/arm-linux-gnueabihf";
+#elif defined(__aarch64__)
+ return "/lib/aarch64-linux-gnu";
+#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
+ return "/lib/mips64el-linux-gnuabi64";
+#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
+ return "/lib/mipsel-linux-gnu";
+#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ return "/lib/powerpc64le-linux-gnu";
+#elif defined(__sparc__) && defined(__arch64__)
+ return "/lib/sparc64-linux-gnu";
+#elif defined(__riscv) && __riscv_xlen == 64
+ return "/lib/riscv64-linux-gnu";
+#else
+ return NULL;
+#endif
+}
+
+/* Get full path to program/shared library. */
+static int resolve_full_path(const char *file, char *result, size_t result_sz)
+{
+ const char *search_paths[3] = {};
+ int i;
+
+ if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
+ search_paths[0] = getenv("LD_LIBRARY_PATH");
+ search_paths[1] = "/usr/lib64:/usr/lib";
+ search_paths[2] = arch_specific_lib_paths();
+ } else {
+ search_paths[0] = getenv("PATH");
+ search_paths[1] = "/usr/bin:/usr/sbin";
+ }
+
+ for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
+ const char *s;
+
+ if (!search_paths[i])
+ continue;
+ for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
+ char *next_path;
+ int seg_len;
+
+ if (s[0] == ':')
+ s++;
+ next_path = strchr(s, ':');
+ seg_len = next_path ? next_path - s : strlen(s);
+ if (!seg_len)
+ continue;
+ snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
+ /* ensure it is an executable file/link */
+ if (access(result, R_OK | X_OK) < 0)
+ continue;
+ pr_debug("resolved '%s' to '%s'\n", file, result);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
@@ -10524,10 +10995,12 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
{
DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
+ char full_binary_path[PATH_MAX];
struct bpf_link *link;
size_t ref_ctr_off;
int pfd, err;
bool retprobe, legacy;
+ const char *func_name;
if (!OPTS_VALID(opts, bpf_uprobe_opts))
return libbpf_err_ptr(-EINVAL);
@@ -10536,12 +11009,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+ if (binary_path && !strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, full_binary_path,
+ sizeof(full_binary_path));
+ if (err) {
+ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
+ prog->name, binary_path, err);
+ return libbpf_err_ptr(err);
+ }
+ binary_path = full_binary_path;
+ }
+ func_name = OPTS_GET(opts, func_name, NULL);
+ if (func_name) {
+ long sym_off;
+
+ if (!binary_path) {
+ pr_warn("prog '%s': name-based attach requires binary_path\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+ sym_off = elf_find_func_offset(binary_path, func_name);
+ if (sym_off < 0)
+ return libbpf_err_ptr(sym_off);
+ func_offset += sym_off;
+ }
+
legacy = determine_uprobe_perf_type() < 0;
if (!legacy) {
pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
func_offset, pid, ref_ctr_off);
} else {
- char probe_name[512];
+ char probe_name[PATH_MAX + 64];
if (ref_ctr_off)
return libbpf_err_ptr(-EINVAL);
@@ -10589,6 +11087,60 @@ err_out:
}
+/* Format of u[ret]probe section definition supporting auto-attach:
+ * u[ret]probe/binary:function[+offset]
+ *
+ * binary can be an absolute/relative path or a filename; the latter is resolved to a
+ * full binary path via bpf_program__attach_uprobe_opts.
+ *
+ * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
+ * specified (and auto-attach is not possible) or the above format is specified for
+ * auto-attach.
+ */
+static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
+ int n, ret = -EINVAL;
+ long offset = 0;
+
+ *link = NULL;
+
+ n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
+ &probe_type, &binary_path, &func_name, &offset);
+ switch (n) {
+ case 1:
+ /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
+ ret = 0;
+ break;
+ case 2:
+ pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
+ prog->name, prog->sec_name);
+ break;
+ case 3:
+ case 4:
+ opts.retprobe = strcmp(probe_type, "uretprobe") == 0;
+ if (opts.retprobe && offset != 0) {
+ pr_warn("prog '%s': uretprobes do not support offset specification\n",
+ prog->name);
+ break;
+ }
+ opts.func_name = func_name;
+ *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
+ ret = libbpf_get_error(*link);
+ break;
+ default:
+ pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
+ prog->sec_name);
+ break;
+ }
+ free(probe_type);
+ free(binary_path);
+ free(func_name);
+
+ return ret;
+}
+
struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
bool retprobe, pid_t pid,
const char *binary_path,
@@ -10599,6 +11151,85 @@ struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
}
+struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
+ pid_t pid, const char *binary_path,
+ const char *usdt_provider, const char *usdt_name,
+ const struct bpf_usdt_opts *opts)
+{
+ char resolved_path[512];
+ struct bpf_object *obj = prog->obj;
+ struct bpf_link *link;
+ __u64 usdt_cookie;
+ int err;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ if (bpf_program__fd(prog) < 0) {
+ pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ prog->name);
+ return libbpf_err_ptr(-EINVAL);
+ }
+
+ if (!strchr(binary_path, '/')) {
+ err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
+ if (err) {
+ pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
+ prog->name, binary_path, err);
+ return libbpf_err_ptr(err);
+ }
+ binary_path = resolved_path;
+ }
+
+ /* USDT manager is instantiated lazily on first USDT attach. It will
+ * be destroyed together with BPF object in bpf_object__close().
+ */
+ if (IS_ERR(obj->usdt_man))
+ return libbpf_ptr(obj->usdt_man);
+ if (!obj->usdt_man) {
+ obj->usdt_man = usdt_manager_new(obj);
+ if (IS_ERR(obj->usdt_man))
+ return libbpf_ptr(obj->usdt_man);
+ }
+
+ usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
+ link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
+ usdt_provider, usdt_name, usdt_cookie);
+ err = libbpf_get_error(link);
+ if (err)
+ return libbpf_err_ptr(err);
+ return link;
+}
+
+static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+ char *path = NULL, *provider = NULL, *name = NULL;
+ const char *sec_name;
+ int n, err;
+
+ sec_name = bpf_program__section_name(prog);
+ if (strcmp(sec_name, "usdt") == 0) {
+ /* no auto-attach for just SEC("usdt") */
+ *link = NULL;
+ return 0;
+ }
+
+ n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
+ if (n != 3) {
+ pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
+ sec_name);
+ err = -EINVAL;
+ } else {
+ *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
+ provider, name, NULL);
+ err = libbpf_get_error(*link);
+ }
+ free(path);
+ free(provider);
+ free(name);
+ return err;
+}
+
static int determine_tracepoint_id(const char *tp_category,
const char *tp_name)
{
@@ -10791,7 +11422,8 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro
return libbpf_err_ptr(-ENOMEM);
link->detach = &bpf_link__detach_fd;
- pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
+ /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
+ pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), NULL);
if (pfd < 0) {
pfd = -errno;
free(link);
@@ -10800,7 +11432,7 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro
return libbpf_err_ptr(pfd);
}
link->fd = pfd;
- return (struct bpf_link *)link;
+ return link;
}
struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
@@ -12211,7 +12843,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
struct bpf_program *prog = *s->progs[i].prog;
struct bpf_link **link = s->progs[i].link;
- if (!prog->load)
+ if (!prog->autoload)
continue;
/* auto-attaching not supported for this program */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 05dde85e19a6..cdbfee60ea3e 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -378,7 +378,31 @@ struct bpf_link;
LIBBPF_API struct bpf_link *bpf_link__open(const char *path);
LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
+/**
+ * @brief **bpf_link__pin()** pins the BPF link to a file
+ * in the BPF FS specified by a path. This increments the links
+ * reference count, allowing it to stay loaded after the process
+ * which loaded it has exited.
+ *
+ * @param link BPF link to pin, must already be loaded
+ * @param path file path in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
+
LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
+
+/**
+ * @brief **bpf_link__unpin()** unpins the BPF link from a file
+ * in the BPFFS specified by a path. This decrements the links
+ * reference count.
+ *
+ * The file pinning the BPF link can also be unlinked by a different
+ * process in which case this function will return an error.
+ *
+ * @param prog BPF program to unpin
+ * @param path file path to the pin in a BPF file system
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
struct bpf_program *prog);
@@ -386,6 +410,22 @@ LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__detach(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
+/**
+ * @brief **bpf_program__attach()** is a generic function for attaching
+ * a BPF program based on auto-detection of program type, attach type,
+ * and extra paremeters, where applicable.
+ *
+ * @param prog BPF program to attach
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ *
+ * This is supported for:
+ * - kprobe/kretprobe (depends on SEC() definition)
+ * - uprobe/uretprobe (depends on SEC() definition)
+ * - tracepoint
+ * - raw tracepoint
+ * - tracing programs (typed raw TP/fentry/fexit/fmod_ret)
+ */
LIBBPF_API struct bpf_link *
bpf_program__attach(const struct bpf_program *prog);
@@ -459,9 +499,17 @@ struct bpf_uprobe_opts {
__u64 bpf_cookie;
/* uprobe is return probe, invoked at function return time */
bool retprobe;
+ /* Function name to attach to. Could be an unqualified ("abc") or library-qualified
+ * "abc@LIBXYZ" name. To specify function entry, func_name should be set while
+ * func_offset argument to bpf_prog__attach_uprobe_opts() should be 0. To trace an
+ * offset within a function, specify func_name and use func_offset argument to specify
+ * offset within the function. Shared library functions must specify the shared library
+ * binary_path.
+ */
+ const char *func_name;
size_t :0;
};
-#define bpf_uprobe_opts__last_field retprobe
+#define bpf_uprobe_opts__last_field func_name
/**
* @brief **bpf_program__attach_uprobe()** attaches a BPF program
@@ -503,6 +551,37 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
const char *binary_path, size_t func_offset,
const struct bpf_uprobe_opts *opts);
+struct bpf_usdt_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ /* custom user-provided value accessible through usdt_cookie() */
+ __u64 usdt_cookie;
+ size_t :0;
+};
+#define bpf_usdt_opts__last_field usdt_cookie
+
+/**
+ * @brief **bpf_program__attach_usdt()** is just like
+ * bpf_program__attach_uprobe_opts() except it covers USDT (User-space
+ * Statically Defined Tracepoint) attachment, instead of attaching to
+ * user-space function entry or exit.
+ *
+ * @param prog BPF program to attach
+ * @param pid Process ID to attach the uprobe to, 0 for self (own process),
+ * -1 for all processes
+ * @param binary_path Path to binary that contains provided USDT probe
+ * @param usdt_provider USDT provider name
+ * @param usdt_name USDT probe name
+ * @param opts Options for altering program attachment
+ * @return Reference to the newly created BPF link; or NULL is returned on error,
+ * error code is stored in errno
+ */
+LIBBPF_API struct bpf_link *
+bpf_program__attach_usdt(const struct bpf_program *prog,
+ pid_t pid, const char *binary_path,
+ const char *usdt_provider, const char *usdt_name,
+ const struct bpf_usdt_opts *opts);
+
struct bpf_tracepoint_opts {
/* size of this struct, for forward/backward compatiblity */
size_t sz;
@@ -647,12 +726,37 @@ LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog);
-LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
- enum bpf_prog_type type);
+
+/**
+ * @brief **bpf_program__set_type()** sets the program
+ * type of the passed BPF program.
+ * @param prog BPF program to set the program type for
+ * @param type program type to set the BPF map to have
+ * @return error code; or 0 if no error. An error occurs
+ * if the object is already loaded.
+ *
+ * This must be called before the BPF object is loaded,
+ * otherwise it has no effect and an error is returned.
+ */
+LIBBPF_API int bpf_program__set_type(struct bpf_program *prog,
+ enum bpf_prog_type type);
LIBBPF_API enum bpf_attach_type
bpf_program__expected_attach_type(const struct bpf_program *prog);
-LIBBPF_API void
+
+/**
+ * @brief **bpf_program__set_expected_attach_type()** sets the
+ * attach type of the passed BPF program. This is used for
+ * auto-detection of attachment when programs are loaded.
+ * @param prog BPF program to set the attach type for
+ * @param type attach type to set the BPF map to have
+ * @return error code; or 0 if no error. An error occurs
+ * if the object is already loaded.
+ *
+ * This must be called before the BPF object is loaded,
+ * otherwise it has no effect and an error is returned.
+ */
+LIBBPF_API int
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
@@ -668,6 +772,17 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le
LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);
LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size);
+/**
+ * @brief **bpf_program__set_attach_target()** sets BTF-based attach target
+ * for supported BPF program types:
+ * - BTF-aware raw tracepoints (tp_btf);
+ * - fentry/fexit/fmod_ret;
+ * - lsm;
+ * - freplace.
+ * @param prog BPF program to set the attach type for
+ * @param type attach type to set the BPF map to have
+ * @return error code; or 0 if no error occurred.
+ */
LIBBPF_API int
bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
const char *attach_func_name);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index dd35ee58bfaa..82f6d62176dd 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -444,6 +444,7 @@ LIBBPF_0.8.0 {
global:
bpf_object__destroy_subskeleton;
bpf_object__open_subskeleton;
+ bpf_program__attach_usdt;
libbpf_register_prog_handler;
libbpf_unregister_prog_handler;
bpf_program__attach_kprobe_multi_opts;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b6247dc7f8eb..4abdbe2fea9d 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -103,6 +103,17 @@
#define str_has_pfx(str, pfx) \
(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+/* suffix check */
+static inline bool str_has_sfx(const char *str, const char *sfx)
+{
+ size_t str_len = strlen(str);
+ size_t sfx_len = strlen(sfx);
+
+ if (sfx_len <= str_len)
+ return strcmp(str + str_len - sfx_len, sfx);
+ return false;
+}
+
/* Symbol versioning is different between static and shared library.
* Properly versioned symbols are needed for shared library, but
* only the symbol of the new version is needed for static library.
@@ -148,6 +159,15 @@ do { \
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
+
+struct bpf_link {
+ int (*detach)(struct bpf_link *link);
+ void (*dealloc)(struct bpf_link *link);
+ char *pin_path; /* NULL, if not pinned */
+ int fd; /* hook FD, -1 if not applicable */
+ bool disconnected;
+};
+
/*
* Re-implement glibc's reallocarray() for libbpf internal-only use.
* reallocarray(), unfortunately, is not available in all versions of glibc,
@@ -329,6 +349,8 @@ enum kern_feature_id {
FEAT_BTF_TYPE_TAG,
/* memcg-based accounting for BPF maps and progs */
FEAT_MEMCG_ACCOUNT,
+ /* BPF cookie (bpf_get_attach_cookie() BPF helper) support */
+ FEAT_BPF_COOKIE,
__FEAT_CNT,
};
@@ -354,6 +376,13 @@ struct btf_ext_info {
void *info;
__u32 rec_size;
__u32 len;
+ /* optional (maintained internally by libbpf) mapping between .BTF.ext
+ * section and corresponding ELF section. This is used to join
+ * information like CO-RE relocation records with corresponding BPF
+ * programs defined in ELF sections
+ */
+ __u32 *sec_idxs;
+ int sec_cnt;
};
#define for_each_btf_ext_sec(seg, sec) \
@@ -543,4 +572,12 @@ int bpf_core_add_cands(struct bpf_core_cand *local_cand,
struct bpf_core_cand_list *cands);
void bpf_core_free_cands(struct bpf_core_cand_list *cands);
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj);
+void usdt_manager_free(struct usdt_manager *man);
+struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man,
+ const struct bpf_program *prog,
+ pid_t pid, const char *path,
+ const char *usdt_provider, const char *usdt_name,
+ __u64 usdt_cookie);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index f946f23eab20..ba4453dfd1ed 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -178,29 +178,28 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
* Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
* string to specify enumerator's value index that need to be relocated.
*/
-static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
- __u32 type_id,
- const char *spec_str,
- enum bpf_core_relo_kind relo_kind,
- struct bpf_core_spec *spec)
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+ const struct bpf_core_relo *relo,
+ struct bpf_core_spec *spec)
{
int access_idx, parsed_len, i;
struct bpf_core_accessor *acc;
const struct btf_type *t;
- const char *name;
+ const char *name, *spec_str;
__u32 id;
__s64 sz;
+ spec_str = btf__name_by_offset(btf, relo->access_str_off);
if (str_is_empty(spec_str) || *spec_str == ':')
return -EINVAL;
memset(spec, 0, sizeof(*spec));
spec->btf = btf;
- spec->root_type_id = type_id;
- spec->relo_kind = relo_kind;
+ spec->root_type_id = relo->type_id;
+ spec->relo_kind = relo->kind;
/* type-based relocations don't have a field access string */
- if (core_relo_is_type_based(relo_kind)) {
+ if (core_relo_is_type_based(relo->kind)) {
if (strcmp(spec_str, "0"))
return -EINVAL;
return 0;
@@ -221,7 +220,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
if (spec->raw_len == 0)
return -EINVAL;
- t = skip_mods_and_typedefs(btf, type_id, &id);
+ t = skip_mods_and_typedefs(btf, relo->type_id, &id);
if (!t)
return -EINVAL;
@@ -231,7 +230,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
acc->idx = access_idx;
spec->len++;
- if (core_relo_is_enumval_based(relo_kind)) {
+ if (core_relo_is_enumval_based(relo->kind)) {
if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
return -EINVAL;
@@ -240,7 +239,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
return 0;
}
- if (!core_relo_is_field_based(relo_kind))
+ if (!core_relo_is_field_based(relo->kind))
return -EINVAL;
sz = btf__resolve_size(btf, id);
@@ -301,7 +300,7 @@ static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
spec->bit_offset += access_idx * sz * 8;
} else {
pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
- prog_name, type_id, spec_str, i, id, btf_kind_str(t));
+ prog_name, relo->type_id, spec_str, i, id, btf_kind_str(t));
return -EINVAL;
}
}
@@ -1055,51 +1054,66 @@ poison:
* [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
* where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
*/
-static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec)
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec)
{
const struct btf_type *t;
const struct btf_enum *e;
const char *s;
__u32 type_id;
- int i;
+ int i, len = 0;
+
+#define append_buf(fmt, args...) \
+ ({ \
+ int r; \
+ r = snprintf(buf, buf_sz, fmt, ##args); \
+ len += r; \
+ if (r >= buf_sz) \
+ r = buf_sz; \
+ buf += r; \
+ buf_sz -= r; \
+ })
type_id = spec->root_type_id;
t = btf_type_by_id(spec->btf, type_id);
s = btf__name_by_offset(spec->btf, t->name_off);
- libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+ append_buf("<%s> [%u] %s %s",
+ core_relo_kind_str(spec->relo_kind),
+ type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
if (core_relo_is_type_based(spec->relo_kind))
- return;
+ return len;
if (core_relo_is_enumval_based(spec->relo_kind)) {
t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
e = btf_enum(t) + spec->raw_spec[0];
s = btf__name_by_offset(spec->btf, e->name_off);
- libbpf_print(level, "::%s = %u", s, e->val);
- return;
+ append_buf("::%s = %u", s, e->val);
+ return len;
}
if (core_relo_is_field_based(spec->relo_kind)) {
for (i = 0; i < spec->len; i++) {
if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
+ append_buf(".%s", spec->spec[i].name);
else if (i > 0 || spec->spec[i].idx > 0)
- libbpf_print(level, "[%u]", spec->spec[i].idx);
+ append_buf("[%u]", spec->spec[i].idx);
}
- libbpf_print(level, " (");
+ append_buf(" (");
for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+ append_buf("%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
if (spec->bit_offset % 8)
- libbpf_print(level, " @ offset %u.%u)",
- spec->bit_offset / 8, spec->bit_offset % 8);
+ append_buf(" @ offset %u.%u)", spec->bit_offset / 8, spec->bit_offset % 8);
else
- libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
- return;
+ append_buf(" @ offset %u)", spec->bit_offset / 8);
+ return len;
}
+
+ return len;
+#undef append_buf
}
/*
@@ -1167,7 +1181,7 @@ int bpf_core_calc_relo_insn(const char *prog_name,
const struct btf_type *local_type;
const char *local_name;
__u32 local_id;
- const char *spec_str;
+ char spec_buf[256];
int i, j, err;
local_id = relo->type_id;
@@ -1176,24 +1190,20 @@ int bpf_core_calc_relo_insn(const char *prog_name,
if (!local_name)
return -EINVAL;
- spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
- if (str_is_empty(spec_str))
- return -EINVAL;
-
- err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str,
- relo->kind, local_spec);
+ err = bpf_core_parse_spec(prog_name, local_btf, relo, local_spec);
if (err) {
+ const char *spec_str;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
prog_name, relo_idx, local_id, btf_kind_str(local_type),
str_is_empty(local_name) ? "<anon>" : local_name,
- spec_str, err);
+ spec_str ?: "<?>", err);
return -EINVAL;
}
- pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
- relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
+ bpf_core_format_spec(spec_buf, sizeof(spec_buf), local_spec);
+ pr_debug("prog '%s': relo #%d: %s\n", prog_name, relo_idx, spec_buf);
/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) {
@@ -1207,7 +1217,7 @@ int bpf_core_calc_relo_insn(const char *prog_name,
}
/* libbpf doesn't support candidate search for anonymous types */
- if (str_is_empty(spec_str)) {
+ if (str_is_empty(local_name)) {
pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
return -EOPNOTSUPP;
@@ -1217,17 +1227,15 @@ int bpf_core_calc_relo_insn(const char *prog_name,
err = bpf_core_spec_match(local_spec, cands->cands[i].btf,
cands->cands[i].id, cand_spec);
if (err < 0) {
- pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
- prog_name, relo_idx, i);
- bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec);
- libbpf_print(LIBBPF_WARN, ": %d\n", err);
+ bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ",
+ prog_name, relo_idx, i, spec_buf, err);
return err;
}
- pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
- relo_idx, err == 0 ? "non-matching" : "matching", i);
- bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
+ bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec);
+ pr_debug("prog '%s': relo #%d: %s candidate #%d %s\n", prog_name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i, spec_buf);
if (err == 0)
continue;
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
index a28bf3711ce2..073039d8ca4f 100644
--- a/tools/lib/bpf/relo_core.h
+++ b/tools/lib/bpf/relo_core.h
@@ -84,4 +84,10 @@ int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
int insn_idx, const struct bpf_core_relo *relo,
int relo_idx, const struct bpf_core_relo_res *res);
+int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
+ const struct bpf_core_relo *relo,
+ struct bpf_core_spec *spec);
+
+int bpf_core_format_spec(char *buf, size_t buf_sz, const struct bpf_core_spec *spec);
+
#endif
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
new file mode 100644
index 000000000000..4181fddb3687
--- /dev/null
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#ifndef __USDT_BPF_H__
+#define __USDT_BPF_H__
+
+#include <linux/errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* Below types and maps are internal implementation details of libbpf's USDT
+ * support and are subjects to change. Also, bpf_usdt_xxx() API helpers should
+ * be considered an unstable API as well and might be adjusted based on user
+ * feedback from using libbpf's USDT support in production.
+ */
+
+/* User can override BPF_USDT_MAX_SPEC_CNT to change default size of internal
+ * map that keeps track of USDT argument specifications. This might be
+ * necessary if there are a lot of USDT attachments.
+ */
+#ifndef BPF_USDT_MAX_SPEC_CNT
+#define BPF_USDT_MAX_SPEC_CNT 256
+#endif
+/* User can override BPF_USDT_MAX_IP_CNT to change default size of internal
+ * map that keeps track of IP (memory address) mapping to USDT argument
+ * specification.
+ * Note, if kernel supports BPF cookies, this map is not used and could be
+ * resized all the way to 1 to save a bit of memory.
+ */
+#ifndef BPF_USDT_MAX_IP_CNT
+#define BPF_USDT_MAX_IP_CNT (4 * BPF_USDT_MAX_SPEC_CNT)
+#endif
+/* We use BPF CO-RE to detect support for BPF cookie from BPF side. This is
+ * the only dependency on CO-RE, so if it's undesirable, user can override
+ * BPF_USDT_HAS_BPF_COOKIE to specify whether to BPF cookie is supported or not.
+ */
+#ifndef BPF_USDT_HAS_BPF_COOKIE
+#define BPF_USDT_HAS_BPF_COOKIE \
+ bpf_core_enum_value_exists(enum bpf_func_id___usdt, BPF_FUNC_get_attach_cookie___usdt)
+#endif
+
+enum __bpf_usdt_arg_type {
+ BPF_USDT_ARG_CONST,
+ BPF_USDT_ARG_REG,
+ BPF_USDT_ARG_REG_DEREF,
+};
+
+struct __bpf_usdt_arg_spec {
+ /* u64 scalar interpreted depending on arg_type, see below */
+ __u64 val_off;
+ /* arg location case, see bpf_udst_arg() for details */
+ enum __bpf_usdt_arg_type arg_type;
+ /* offset of referenced register within struct pt_regs */
+ short reg_off;
+ /* whether arg should be interpreted as signed value */
+ bool arg_signed;
+ /* number of bits that need to be cleared and, optionally,
+ * sign-extended to cast arguments that are 1, 2, or 4 bytes
+ * long into final 8-byte u64/s64 value returned to user
+ */
+ char arg_bitshift;
+};
+
+/* should match USDT_MAX_ARG_CNT in usdt.c exactly */
+#define BPF_USDT_MAX_ARG_CNT 12
+struct __bpf_usdt_spec {
+ struct __bpf_usdt_arg_spec args[BPF_USDT_MAX_ARG_CNT];
+ __u64 usdt_cookie;
+ short arg_cnt;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, BPF_USDT_MAX_SPEC_CNT);
+ __type(key, int);
+ __type(value, struct __bpf_usdt_spec);
+} __bpf_usdt_specs SEC(".maps") __weak;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, BPF_USDT_MAX_IP_CNT);
+ __type(key, long);
+ __type(value, __u32);
+} __bpf_usdt_ip_to_spec_id SEC(".maps") __weak;
+
+/* don't rely on user's BPF code to have latest definition of bpf_func_id */
+enum bpf_func_id___usdt {
+ BPF_FUNC_get_attach_cookie___usdt = 0xBAD, /* value doesn't matter */
+};
+
+static __always_inline
+int __bpf_usdt_spec_id(struct pt_regs *ctx)
+{
+ if (!BPF_USDT_HAS_BPF_COOKIE) {
+ long ip = PT_REGS_IP(ctx);
+ int *spec_id_ptr;
+
+ spec_id_ptr = bpf_map_lookup_elem(&__bpf_usdt_ip_to_spec_id, &ip);
+ return spec_id_ptr ? *spec_id_ptr : -ESRCH;
+ }
+
+ return bpf_get_attach_cookie(ctx);
+}
+
+/* Return number of USDT arguments defined for currently traced USDT. */
+__weak __hidden
+int bpf_usdt_arg_cnt(struct pt_regs *ctx)
+{
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ return spec->arg_cnt;
+}
+
+/* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
+ * Returns 0 on success; negative error, otherwise.
+ * On error *res is guaranteed to be set to zero.
+ */
+__weak __hidden
+int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
+{
+ struct __bpf_usdt_spec *spec;
+ struct __bpf_usdt_arg_spec *arg_spec;
+ unsigned long val;
+ int err, spec_id;
+
+ *res = 0;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return -ESRCH;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return -ESRCH;
+
+ if (arg_num >= BPF_USDT_MAX_ARG_CNT || arg_num >= spec->arg_cnt)
+ return -ENOENT;
+
+ arg_spec = &spec->args[arg_num];
+ switch (arg_spec->arg_type) {
+ case BPF_USDT_ARG_CONST:
+ /* Arg is just a constant ("-4@$-9" in USDT arg spec).
+ * value is recorded in arg_spec->val_off directly.
+ */
+ val = arg_spec->val_off;
+ break;
+ case BPF_USDT_ARG_REG:
+ /* Arg is in a register (e.g, "8@%rax" in USDT arg spec),
+ * so we read the contents of that register directly from
+ * struct pt_regs. To keep things simple user-space parts
+ * record offsetof(struct pt_regs, <regname>) in arg_spec->reg_off.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ break;
+ case BPF_USDT_ARG_REG_DEREF:
+ /* Arg is in memory addressed by register, plus some offset
+ * (e.g., "-4@-1204(%rbp)" in USDT arg spec). Register is
+ * identified like with BPF_USDT_ARG_REG case, and the offset
+ * is in arg_spec->val_off. We first fetch register contents
+ * from pt_regs, then do another user-space probe read to
+ * fetch argument value itself.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_user(&val, sizeof(val), (void *)val + arg_spec->val_off);
+ if (err)
+ return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ val >>= arg_spec->arg_bitshift;
+#endif
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* cast arg from 1, 2, or 4 bytes to final 8 byte size clearing
+ * necessary upper arg_bitshift bits, with sign extension if argument
+ * is signed
+ */
+ val <<= arg_spec->arg_bitshift;
+ if (arg_spec->arg_signed)
+ val = ((long)val) >> arg_spec->arg_bitshift;
+ else
+ val = val >> arg_spec->arg_bitshift;
+ *res = val;
+ return 0;
+}
+
+/* Retrieve user-specified cookie value provided during attach as
+ * bpf_usdt_opts.usdt_cookie. This serves the same purpose as BPF cookie
+ * returned by bpf_get_attach_cookie(). Libbpf's support for USDT is itself
+ * utilizing BPF cookies internally, so user can't use BPF cookie directly
+ * for USDT programs and has to use bpf_usdt_cookie() API instead.
+ */
+__weak __hidden
+long bpf_usdt_cookie(struct pt_regs *ctx)
+{
+ struct __bpf_usdt_spec *spec;
+ int spec_id;
+
+ spec_id = __bpf_usdt_spec_id(ctx);
+ if (spec_id < 0)
+ return 0;
+
+ spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+ if (!spec)
+ return 0;
+
+ return spec->usdt_cookie;
+}
+
+/* we rely on ___bpf_apply() and ___bpf_narg() macros already defined in bpf_tracing.h */
+#define ___bpf_usdt_args0() ctx
+#define ___bpf_usdt_args1(x) ___bpf_usdt_args0(), ({ long _x; bpf_usdt_arg(ctx, 0, &_x); (void *)_x; })
+#define ___bpf_usdt_args2(x, args...) ___bpf_usdt_args1(args), ({ long _x; bpf_usdt_arg(ctx, 1, &_x); (void *)_x; })
+#define ___bpf_usdt_args3(x, args...) ___bpf_usdt_args2(args), ({ long _x; bpf_usdt_arg(ctx, 2, &_x); (void *)_x; })
+#define ___bpf_usdt_args4(x, args...) ___bpf_usdt_args3(args), ({ long _x; bpf_usdt_arg(ctx, 3, &_x); (void *)_x; })
+#define ___bpf_usdt_args5(x, args...) ___bpf_usdt_args4(args), ({ long _x; bpf_usdt_arg(ctx, 4, &_x); (void *)_x; })
+#define ___bpf_usdt_args6(x, args...) ___bpf_usdt_args5(args), ({ long _x; bpf_usdt_arg(ctx, 5, &_x); (void *)_x; })
+#define ___bpf_usdt_args7(x, args...) ___bpf_usdt_args6(args), ({ long _x; bpf_usdt_arg(ctx, 6, &_x); (void *)_x; })
+#define ___bpf_usdt_args8(x, args...) ___bpf_usdt_args7(args), ({ long _x; bpf_usdt_arg(ctx, 7, &_x); (void *)_x; })
+#define ___bpf_usdt_args9(x, args...) ___bpf_usdt_args8(args), ({ long _x; bpf_usdt_arg(ctx, 8, &_x); (void *)_x; })
+#define ___bpf_usdt_args10(x, args...) ___bpf_usdt_args9(args), ({ long _x; bpf_usdt_arg(ctx, 9, &_x); (void *)_x; })
+#define ___bpf_usdt_args11(x, args...) ___bpf_usdt_args10(args), ({ long _x; bpf_usdt_arg(ctx, 10, &_x); (void *)_x; })
+#define ___bpf_usdt_args12(x, args...) ___bpf_usdt_args11(args), ({ long _x; bpf_usdt_arg(ctx, 11, &_x); (void *)_x; })
+#define ___bpf_usdt_args(args...) ___bpf_apply(___bpf_usdt_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_USDT serves the same purpose for USDT handlers as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs and BPF_KPROBE for kprobes.
+ * Original struct pt_regs * context is preserved as 'ctx' argument.
+ */
+#define BPF_USDT(name, args...) \
+name(struct pt_regs *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_usdt_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args)
+
+#endif /* __USDT_BPF_H__ */
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
new file mode 100644
index 000000000000..f1c9339cfbbc
--- /dev/null
+++ b/tools/lib/bpf/usdt.c
@@ -0,0 +1,1518 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <unistd.h>
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+
+/* s8 will be marked as poison while it's a reg of riscv */
+#if defined(__riscv)
+#define rv_s8 s8
+#endif
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_common.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+/* libbpf's USDT support consists of BPF-side state/code and user-space
+ * state/code working together in concert. BPF-side parts are defined in
+ * usdt.bpf.h header library. User-space state is encapsulated by struct
+ * usdt_manager and all the supporting code centered around usdt_manager.
+ *
+ * usdt.bpf.h defines two BPF maps that usdt_manager expects: USDT spec map
+ * and IP-to-spec-ID map, which is auxiliary map necessary for kernels that
+ * don't support BPF cookie (see below). These two maps are implicitly
+ * embedded into user's end BPF object file when user's code included
+ * usdt.bpf.h. This means that libbpf doesn't do anything special to create
+ * these USDT support maps. They are created by normal libbpf logic of
+ * instantiating BPF maps when opening and loading BPF object.
+ *
+ * As such, libbpf is basically unaware of the need to do anything
+ * USDT-related until the very first call to bpf_program__attach_usdt(), which
+ * can be called by user explicitly or happen automatically during skeleton
+ * attach (or, equivalently, through generic bpf_program__attach() call). At
+ * this point, libbpf will instantiate and initialize struct usdt_manager and
+ * store it in bpf_object. USDT manager is per-BPF object construct, as each
+ * independent BPF object might or might not have USDT programs, and thus all
+ * the expected USDT-related state. There is no coordination between two
+ * bpf_object in parts of USDT attachment, they are oblivious of each other's
+ * existence and libbpf is just oblivious, dealing with bpf_object-specific
+ * USDT state.
+ *
+ * Quick crash course on USDTs.
+ *
+ * From user-space application's point of view, USDT is essentially just
+ * a slightly special function call that normally has zero overhead, unless it
+ * is being traced by some external entity (e.g, BPF-based tool). Here's how
+ * a typical application can trigger USDT probe:
+ *
+ * #include <sys/sdt.h> // provided by systemtap-sdt-devel package
+ * // folly also provide similar functionality in folly/tracing/StaticTracepoint.h
+ *
+ * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y);
+ *
+ * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each
+ * individual USDT has a fixed number of arguments (3 in the above example)
+ * and specifies values of each argument as if it was a function call.
+ *
+ * USDT call is actually not a function call, but is instead replaced by
+ * a single NOP instruction (thus zero overhead, effectively). But in addition
+ * to that, those USDT macros generate special SHT_NOTE ELF records in
+ * .note.stapsdt ELF section. Here's an example USDT definition as emitted by
+ * `readelf -n <binary>`:
+ *
+ * stapsdt 0x00000089 NT_STAPSDT (SystemTap probe descriptors)
+ * Provider: test
+ * Name: usdt12
+ * Location: 0x0000000000549df3, Base: 0x00000000008effa4, Semaphore: 0x0000000000a4606e
+ * Arguments: -4@-1204(%rbp) -4@%edi -8@-1216(%rbp) -8@%r8 -4@$5 -8@%r9 8@%rdx 8@%r10 -4@$-9 -2@%cx -2@%ax -1@%sil
+ *
+ * In this case we have USDT test:usdt12 with 12 arguments.
+ *
+ * Location and base are offsets used to calculate absolute IP address of that
+ * NOP instruction that kernel can replace with an interrupt instruction to
+ * trigger instrumentation code (BPF program for all that we care about).
+ *
+ * Semaphore above is and optional feature. It records an address of a 2-byte
+ * refcount variable (normally in '.probes' ELF section) used for signaling if
+ * there is anything that is attached to USDT. This is useful for user
+ * applications if, for example, they need to prepare some arguments that are
+ * passed only to USDTs and preparation is expensive. By checking if USDT is
+ * "activated", an application can avoid paying those costs unnecessarily.
+ * Recent enough kernel has built-in support for automatically managing this
+ * refcount, which libbpf expects and relies on. If USDT is defined without
+ * associated semaphore, this value will be zero. See selftests for semaphore
+ * examples.
+ *
+ * Arguments is the most interesting part. This USDT specification string is
+ * providing information about all the USDT arguments and their locations. The
+ * part before @ sign defined byte size of the argument (1, 2, 4, or 8) and
+ * whether the argument is signed or unsigned (negative size means signed).
+ * The part after @ sign is assembly-like definition of argument location
+ * (see [0] for more details). Technically, assembler can provide some pretty
+ * advanced definitions, but libbpf is currently supporting three most common
+ * cases:
+ * 1) immediate constant, see 5th and 9th args above (-4@$5 and -4@-9);
+ * 2) register value, e.g., 8@%rdx, which means "unsigned 8-byte integer
+ * whose value is in register %rdx";
+ * 3) memory dereference addressed by register, e.g., -4@-1204(%rbp), which
+ * specifies signed 32-bit integer stored at offset -1204 bytes from
+ * memory address stored in %rbp.
+ *
+ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ *
+ * During attachment, libbpf parses all the relevant USDT specifications and
+ * prepares `struct usdt_spec` (USDT spec), which is then provided to BPF-side
+ * code through spec map. This allows BPF applications to quickly fetch the
+ * actual value at runtime using a simple BPF-side code.
+ *
+ * With basics out of the way, let's go over less immediately obvious aspects
+ * of supporting USDTs.
+ *
+ * First, there is no special USDT BPF program type. It is actually just
+ * a uprobe BPF program (which for kernel, at least currently, is just a kprobe
+ * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference
+ * that uprobe is usually attached at the function entry, while USDT will
+ * normally will be somewhere inside the function. But it should always be
+ * pointing to NOP instruction, which makes such uprobes the fastest uprobe
+ * kind.
+ *
+ * Second, it's important to realize that such STAP_PROBEn(provider, name, ...)
+ * macro invocations can end up being inlined many-many times, depending on
+ * specifics of each individual user application. So single conceptual USDT
+ * (identified by provider:name pair of identifiers) is, generally speaking,
+ * multiple uprobe locations (USDT call sites) in different places in user
+ * application. Further, again due to inlining, each USDT call site might end
+ * up having the same argument #N be located in a different place. In one call
+ * site it could be a constant, in another will end up in a register, and in
+ * yet another could be some other register or even somewhere on the stack.
+ *
+ * As such, "attaching to USDT" means (in general case) attaching the same
+ * uprobe BPF program to multiple target locations in user application, each
+ * potentially having a completely different USDT spec associated with it.
+ * To wire all this up together libbpf allocates a unique integer spec ID for
+ * each unique USDT spec. Spec IDs are allocated as sequential small integers
+ * so that they can be used as keys in array BPF map (for performance reasons).
+ * Spec ID allocation and accounting is big part of what usdt_manager is
+ * about. This state has to be maintained per-BPF object and coordinate
+ * between different USDT attachments within the same BPF object.
+ *
+ * Spec ID is the key in spec BPF map, value is the actual USDT spec layed out
+ * as struct usdt_spec. Each invocation of BPF program at runtime needs to
+ * know its associated spec ID. It gets it either through BPF cookie, which
+ * libbpf sets to spec ID during attach time, or, if kernel is too old to
+ * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such
+ * case. The latter means that some modes of operation can't be supported
+ * without BPF cookie. Such mode is attaching to shared library "generically",
+ * without specifying target process. In such case, it's impossible to
+ * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode
+ * is not supported without BPF cookie support.
+ *
+ * Note that libbpf is using BPF cookie functionality for its own internal
+ * needs, so user itself can't rely on BPF cookie feature. To that end, libbpf
+ * provides conceptually equivalent USDT cookie support. It's still u64
+ * user-provided value that can be associated with USDT attachment. Note that
+ * this will be the same value for all USDT call sites within the same single
+ * *logical* USDT attachment. This makes sense because to user attaching to
+ * USDT is a single BPF program triggered for singular USDT probe. The fact
+ * that this is done at multiple actual locations is a mostly hidden
+ * implementation details. This USDT cookie value can be fetched with
+ * bpf_usdt_cookie(ctx) API provided by usdt.bpf.h
+ *
+ * Lastly, while single USDT can have tons of USDT call sites, it doesn't
+ * necessarily have that many different USDT specs. It very well might be
+ * that 1000 USDT call sites only need 5 different USDT specs, because all the
+ * arguments are typically contained in a small set of registers or stack
+ * locations. As such, it's wasteful to allocate as many USDT spec IDs as
+ * there are USDT call sites. So libbpf tries to be frugal and performs
+ * on-the-fly deduplication during a single USDT attachment to only allocate
+ * the minimal required amount of unique USDT specs (and thus spec IDs). This
+ * is trivially achieved by using USDT spec string (Arguments string from USDT
+ * note) as a lookup key in a hashmap. USDT spec string uniquely defines
+ * everything about how to fetch USDT arguments, so two USDT call sites
+ * sharing USDT spec string can safely share the same USDT spec and spec ID.
+ * Note, this spec string deduplication is happening only during the same USDT
+ * attachment, so each USDT spec shares the same USDT cookie value. This is
+ * not generally true for other USDT attachments within the same BPF object,
+ * as even if USDT spec string is the same, USDT cookie value can be
+ * different. It was deemed excessive to try to deduplicate across independent
+ * USDT attachments by taking into account USDT spec string *and* USDT cookie
+ * value, which would complicated spec ID accounting significantly for little
+ * gain.
+ */
+
+#define USDT_BASE_SEC ".stapsdt.base"
+#define USDT_SEMA_SEC ".probes"
+#define USDT_NOTE_SEC ".note.stapsdt"
+#define USDT_NOTE_TYPE 3
+#define USDT_NOTE_NAME "stapsdt"
+
+/* should match exactly enum __bpf_usdt_arg_type from usdt.bpf.h */
+enum usdt_arg_type {
+ USDT_ARG_CONST,
+ USDT_ARG_REG,
+ USDT_ARG_REG_DEREF,
+};
+
+/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
+struct usdt_arg_spec {
+ __u64 val_off;
+ enum usdt_arg_type arg_type;
+ short reg_off;
+ bool arg_signed;
+ char arg_bitshift;
+};
+
+/* should match BPF_USDT_MAX_ARG_CNT in usdt.bpf.h */
+#define USDT_MAX_ARG_CNT 12
+
+/* should match struct __bpf_usdt_spec from usdt.bpf.h */
+struct usdt_spec {
+ struct usdt_arg_spec args[USDT_MAX_ARG_CNT];
+ __u64 usdt_cookie;
+ short arg_cnt;
+};
+
+struct usdt_note {
+ const char *provider;
+ const char *name;
+ /* USDT args specification string, e.g.:
+ * "-4@%esi -4@-24(%rbp) -4@%ecx 2@%ax 8@%rdx"
+ */
+ const char *args;
+ long loc_addr;
+ long base_addr;
+ long sema_addr;
+};
+
+struct usdt_target {
+ long abs_ip;
+ long rel_ip;
+ long sema_off;
+ struct usdt_spec spec;
+ const char *spec_str;
+};
+
+struct usdt_manager {
+ struct bpf_map *specs_map;
+ struct bpf_map *ip_to_spec_id_map;
+
+ int *free_spec_ids;
+ size_t free_spec_cnt;
+ size_t next_free_spec_id;
+
+ bool has_bpf_cookie;
+ bool has_sema_refcnt;
+};
+
+struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
+{
+ static const char *ref_ctr_sysfs_path = "/sys/bus/event_source/devices/uprobe/format/ref_ctr_offset";
+ struct usdt_manager *man;
+ struct bpf_map *specs_map, *ip_to_spec_id_map;
+
+ specs_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_specs");
+ ip_to_spec_id_map = bpf_object__find_map_by_name(obj, "__bpf_usdt_ip_to_spec_id");
+ if (!specs_map || !ip_to_spec_id_map) {
+ pr_warn("usdt: failed to find USDT support BPF maps, did you forget to include bpf/usdt.bpf.h?\n");
+ return ERR_PTR(-ESRCH);
+ }
+
+ man = calloc(1, sizeof(*man));
+ if (!man)
+ return ERR_PTR(-ENOMEM);
+
+ man->specs_map = specs_map;
+ man->ip_to_spec_id_map = ip_to_spec_id_map;
+
+ /* Detect if BPF cookie is supported for kprobes.
+ * We don't need IP-to-ID mapping if we can use BPF cookies.
+ * Added in: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value")
+ */
+ man->has_bpf_cookie = kernel_supports(obj, FEAT_BPF_COOKIE);
+
+ /* Detect kernel support for automatic refcounting of USDT semaphore.
+ * If this is not supported, USDTs with semaphores will not be supported.
+ * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+ */
+ man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0;
+
+ return man;
+}
+
+void usdt_manager_free(struct usdt_manager *man)
+{
+ if (IS_ERR_OR_NULL(man))
+ return;
+
+ free(man->free_spec_ids);
+ free(man);
+}
+
+static int sanity_check_usdt_elf(Elf *elf, const char *path)
+{
+ GElf_Ehdr ehdr;
+ int endianness;
+
+ if (elf_kind(elf) != ELF_K_ELF) {
+ pr_warn("usdt: unrecognized ELF kind %d for '%s'\n", elf_kind(elf), path);
+ return -EBADF;
+ }
+
+ switch (gelf_getclass(elf)) {
+ case ELFCLASS64:
+ if (sizeof(void *) != 8) {
+ pr_warn("usdt: attaching to 64-bit ELF binary '%s' is not supported\n", path);
+ return -EBADF;
+ }
+ break;
+ case ELFCLASS32:
+ if (sizeof(void *) != 4) {
+ pr_warn("usdt: attaching to 32-bit ELF binary '%s' is not supported\n", path);
+ return -EBADF;
+ }
+ break;
+ default:
+ pr_warn("usdt: unsupported ELF class for '%s'\n", path);
+ return -EBADF;
+ }
+
+ if (!gelf_getehdr(elf, &ehdr))
+ return -EINVAL;
+
+ if (ehdr.e_type != ET_EXEC && ehdr.e_type != ET_DYN) {
+ pr_warn("usdt: unsupported type of ELF binary '%s' (%d), only ET_EXEC and ET_DYN are supported\n",
+ path, ehdr.e_type);
+ return -EBADF;
+ }
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ endianness = ELFDATA2LSB;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ endianness = ELFDATA2MSB;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ if (endianness != ehdr.e_ident[EI_DATA]) {
+ pr_warn("usdt: ELF endianness mismatch for '%s'\n", path);
+ return -EBADF;
+ }
+
+ return 0;
+}
+
+static int find_elf_sec_by_name(Elf *elf, const char *sec_name, GElf_Shdr *shdr, Elf_Scn **scn)
+{
+ Elf_Scn *sec = NULL;
+ size_t shstrndx;
+
+ if (elf_getshdrstrndx(elf, &shstrndx))
+ return -EINVAL;
+
+ /* check if ELF is corrupted and avoid calling elf_strptr if yes */
+ if (!elf_rawdata(elf_getscn(elf, shstrndx), NULL))
+ return -EINVAL;
+
+ while ((sec = elf_nextscn(elf, sec)) != NULL) {
+ char *name;
+
+ if (!gelf_getshdr(sec, shdr))
+ return -EINVAL;
+
+ name = elf_strptr(elf, shstrndx, shdr->sh_name);
+ if (name && strcmp(sec_name, name) == 0) {
+ *scn = sec;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+struct elf_seg {
+ long start;
+ long end;
+ long offset;
+ bool is_exec;
+};
+
+static int cmp_elf_segs(const void *_a, const void *_b)
+{
+ const struct elf_seg *a = _a;
+ const struct elf_seg *b = _b;
+
+ return a->start < b->start ? -1 : 1;
+}
+
+static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, size_t *seg_cnt)
+{
+ GElf_Phdr phdr;
+ size_t n;
+ int i, err;
+ struct elf_seg *seg;
+ void *tmp;
+
+ *seg_cnt = 0;
+
+ if (elf_getphdrnum(elf, &n)) {
+ err = -errno;
+ return err;
+ }
+
+ for (i = 0; i < n; i++) {
+ if (!gelf_getphdr(elf, i, &phdr)) {
+ err = -errno;
+ return err;
+ }
+
+ pr_debug("usdt: discovered PHDR #%d in '%s': vaddr 0x%lx memsz 0x%lx offset 0x%lx type 0x%lx flags 0x%lx\n",
+ i, path, (long)phdr.p_vaddr, (long)phdr.p_memsz, (long)phdr.p_offset,
+ (long)phdr.p_type, (long)phdr.p_flags);
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs));
+ if (!tmp)
+ return -ENOMEM;
+
+ *segs = tmp;
+ seg = *segs + *seg_cnt;
+ (*seg_cnt)++;
+
+ seg->start = phdr.p_vaddr;
+ seg->end = phdr.p_vaddr + phdr.p_memsz;
+ seg->offset = phdr.p_offset;
+ seg->is_exec = phdr.p_flags & PF_X;
+ }
+
+ if (*seg_cnt == 0) {
+ pr_warn("usdt: failed to find PT_LOAD program headers in '%s'\n", path);
+ return -ESRCH;
+ }
+
+ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs);
+ return 0;
+}
+
+static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt)
+{
+ char path[PATH_MAX], line[PATH_MAX], mode[16];
+ size_t seg_start, seg_end, seg_off;
+ struct elf_seg *seg;
+ int tmp_pid, i, err;
+ FILE *f;
+
+ *seg_cnt = 0;
+
+ /* Handle containerized binaries only accessible from
+ * /proc/<pid>/root/<path>. They will be reported as just /<path> in
+ * /proc/<pid>/maps.
+ */
+ if (sscanf(lib_path, "/proc/%d/root%s", &tmp_pid, path) == 2 && pid == tmp_pid)
+ goto proceed;
+
+ if (!realpath(lib_path, path)) {
+ pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n",
+ lib_path, -errno);
+ libbpf_strlcpy(path, lib_path, sizeof(path));
+ }
+
+proceed:
+ sprintf(line, "/proc/%d/maps", pid);
+ f = fopen(line, "r");
+ if (!f) {
+ err = -errno;
+ pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n",
+ line, lib_path, err);
+ return err;
+ }
+
+ /* We need to handle lines with no path at the end:
+ *
+ * 7f5c6f5d1000-7f5c6f5d3000 rw-p 001c7000 08:04 21238613 /usr/lib64/libc-2.17.so
+ * 7f5c6f5d3000-7f5c6f5d8000 rw-p 00000000 00:00 0
+ * 7f5c6f5d8000-7f5c6f5d9000 r-xp 00000000 103:01 362990598 /data/users/andriin/linux/tools/bpf/usdt/libhello_usdt.so
+ */
+ while (fscanf(f, "%zx-%zx %s %zx %*s %*d%[^\n]\n",
+ &seg_start, &seg_end, mode, &seg_off, line) == 5) {
+ void *tmp;
+
+ /* to handle no path case (see above) we need to capture line
+ * without skipping any whitespaces. So we need to strip
+ * leading whitespaces manually here
+ */
+ i = 0;
+ while (isblank(line[i]))
+ i++;
+ if (strcmp(line + i, path) != 0)
+ continue;
+
+ pr_debug("usdt: discovered segment for lib '%s': addrs %zx-%zx mode %s offset %zx\n",
+ path, seg_start, seg_end, mode, seg_off);
+
+ /* ignore non-executable sections for shared libs */
+ if (mode[2] != 'x')
+ continue;
+
+ tmp = libbpf_reallocarray(*segs, *seg_cnt + 1, sizeof(**segs));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ *segs = tmp;
+ seg = *segs + *seg_cnt;
+ *seg_cnt += 1;
+
+ seg->start = seg_start;
+ seg->end = seg_end;
+ seg->offset = seg_off;
+ seg->is_exec = true;
+ }
+
+ if (*seg_cnt == 0) {
+ pr_warn("usdt: failed to find '%s' (resolved to '%s') within PID %d memory mappings\n",
+ lib_path, path, pid);
+ err = -ESRCH;
+ goto err_out;
+ }
+
+ qsort(*segs, *seg_cnt, sizeof(**segs), cmp_elf_segs);
+ err = 0;
+err_out:
+ fclose(f);
+ return err;
+}
+
+static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative)
+{
+ struct elf_seg *seg;
+ int i;
+
+ if (relative) {
+ /* for shared libraries, address is relative offset and thus
+ * should be fall within logical offset-based range of
+ * [offset_start, offset_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start))
+ return seg;
+ }
+ } else {
+ /* for binaries, address is absolute and thus should be within
+ * absolute address range of [seg_start, seg_end)
+ */
+ for (i = 0, seg = segs; i < seg_cnt; i++, seg++) {
+ if (seg->start <= addr && addr < seg->end)
+ return seg;
+ }
+ }
+
+ return NULL;
+}
+
+static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
+ GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+ struct usdt_note *usdt_note);
+
+static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
+
+static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid,
+ const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
+ struct usdt_target **out_targets, size_t *out_target_cnt)
+{
+ size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0;
+ struct elf_seg *segs = NULL, *lib_segs = NULL;
+ struct usdt_target *targets = NULL, *target;
+ long base_addr = 0;
+ Elf_Scn *notes_scn, *base_scn;
+ GElf_Shdr base_shdr, notes_shdr;
+ GElf_Ehdr ehdr;
+ GElf_Nhdr nhdr;
+ Elf_Data *data;
+ int err;
+
+ *out_targets = NULL;
+ *out_target_cnt = 0;
+
+ err = find_elf_sec_by_name(elf, USDT_NOTE_SEC, &notes_shdr, &notes_scn);
+ if (err) {
+ pr_warn("usdt: no USDT notes section (%s) found in '%s'\n", USDT_NOTE_SEC, path);
+ return err;
+ }
+
+ if (notes_shdr.sh_type != SHT_NOTE || !gelf_getehdr(elf, &ehdr)) {
+ pr_warn("usdt: invalid USDT notes section (%s) in '%s'\n", USDT_NOTE_SEC, path);
+ return -EINVAL;
+ }
+
+ err = parse_elf_segs(elf, path, &segs, &seg_cnt);
+ if (err) {
+ pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err);
+ goto err_out;
+ }
+
+ /* .stapsdt.base ELF section is optional, but is used for prelink
+ * offset compensation (see a big comment further below)
+ */
+ if (find_elf_sec_by_name(elf, USDT_BASE_SEC, &base_shdr, &base_scn) == 0)
+ base_addr = base_shdr.sh_addr;
+
+ data = elf_getdata(notes_scn, 0);
+ off = 0;
+ while ((off = gelf_getnote(data, off, &nhdr, &name_off, &desc_off)) > 0) {
+ long usdt_abs_ip, usdt_rel_ip, usdt_sema_off = 0;
+ struct usdt_note note;
+ struct elf_seg *seg = NULL;
+ void *tmp;
+
+ err = parse_usdt_note(elf, path, base_addr, &nhdr,
+ data->d_buf, name_off, desc_off, &note);
+ if (err)
+ goto err_out;
+
+ if (strcmp(note.provider, usdt_provider) != 0 || strcmp(note.name, usdt_name) != 0)
+ continue;
+
+ /* We need to compensate "prelink effect". See [0] for details,
+ * relevant parts quoted here:
+ *
+ * Each SDT probe also expands into a non-allocated ELF note. You can
+ * find this by looking at SHT_NOTE sections and decoding the format;
+ * see below for details. Because the note is non-allocated, it means
+ * there is no runtime cost, and also preserved in both stripped files
+ * and .debug files.
+ *
+ * However, this means that prelink won't adjust the note's contents
+ * for address offsets. Instead, this is done via the .stapsdt.base
+ * section. This is a special section that is added to the text. We
+ * will only ever have one of these sections in a final link and it
+ * will only ever be one byte long. Nothing about this section itself
+ * matters, we just use it as a marker to detect prelink address
+ * adjustments.
+ *
+ * Each probe note records the link-time address of the .stapsdt.base
+ * section alongside the probe PC address. The decoder compares the
+ * base address stored in the note with the .stapsdt.base section's
+ * sh_addr. Initially these are the same, but the section header will
+ * be adjusted by prelink. So the decoder applies the difference to
+ * the probe PC address to get the correct prelinked PC address; the
+ * same adjustment is applied to the semaphore address, if any.
+ *
+ * [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ */
+ usdt_rel_ip = usdt_abs_ip = note.loc_addr;
+ if (base_addr) {
+ usdt_abs_ip += base_addr - note.base_addr;
+ usdt_rel_ip += base_addr - note.base_addr;
+ }
+
+ if (ehdr.e_type == ET_EXEC) {
+ /* When attaching uprobes (which what USDTs basically
+ * are) kernel expects a relative IP to be specified,
+ * so if we are attaching to an executable ELF binary
+ * (i.e., not a shared library), we need to calculate
+ * proper relative IP based on ELF's load address
+ */
+ seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_abs_ip);
+ goto err_out;
+ }
+ if (!seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ usdt_abs_ip);
+ goto err_out;
+ }
+
+ usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset);
+ } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */
+ /* If we don't have BPF cookie support but need to
+ * attach to a shared library, we'll need to know and
+ * record absolute addresses of attach points due to
+ * the need to lookup USDT spec by absolute IP of
+ * triggered uprobe. Doing this resolution is only
+ * possible when we have a specific PID of the process
+ * that's using specified shared library. BPF cookie
+ * removes the absolute address limitation as we don't
+ * need to do this lookup (we just use BPF cookie as
+ * an index of USDT spec), so for newer kernels with
+ * BPF cookie support libbpf supports USDT attachment
+ * to shared libraries with no PID filter.
+ */
+ if (pid < 0) {
+ pr_warn("usdt: attaching to shared libraries without specific PID is not supported on current kernel\n");
+ err = -ENOTSUP;
+ goto err_out;
+ }
+
+ /* lib_segs are lazily initialized only if necessary */
+ if (lib_seg_cnt == 0) {
+ err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt);
+ if (err) {
+ pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n",
+ pid, path, err);
+ goto err_out;
+ }
+ }
+
+ seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n",
+ usdt_provider, usdt_name, path, usdt_rel_ip);
+ goto err_out;
+ }
+
+ usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset);
+ }
+
+ pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n",
+ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", path,
+ note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args,
+ seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0);
+
+ /* Adjust semaphore address to be a relative offset */
+ if (note.sema_addr) {
+ if (!man->has_sema_refcnt) {
+ pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n",
+ usdt_provider, usdt_name, path);
+ err = -ENOTSUP;
+ goto err_out;
+ }
+
+ seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */);
+ if (!seg) {
+ err = -ESRCH;
+ pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n",
+ usdt_provider, usdt_name, path, note.sema_addr);
+ goto err_out;
+ }
+ if (seg->is_exec) {
+ err = -ESRCH;
+ pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx] for semaphore of '%s:%s' at 0x%lx is executable\n",
+ path, seg->start, seg->end, usdt_provider, usdt_name,
+ note.sema_addr);
+ goto err_out;
+ }
+
+ usdt_sema_off = note.sema_addr - (seg->start - seg->offset);
+
+ pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n",
+ usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ",
+ path, note.sema_addr, note.base_addr, usdt_sema_off,
+ seg->start, seg->end, seg->offset);
+ }
+
+ /* Record adjusted addresses and offsets and parse USDT spec */
+ tmp = libbpf_reallocarray(targets, target_cnt + 1, sizeof(*targets));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ targets = tmp;
+
+ target = &targets[target_cnt];
+ memset(target, 0, sizeof(*target));
+
+ target->abs_ip = usdt_abs_ip;
+ target->rel_ip = usdt_rel_ip;
+ target->sema_off = usdt_sema_off;
+
+ /* notes->args references strings from Elf itself, so they can
+ * be referenced safely until elf_end() call
+ */
+ target->spec_str = note.args;
+
+ err = parse_usdt_spec(&target->spec, &note, usdt_cookie);
+ if (err)
+ goto err_out;
+
+ target_cnt++;
+ }
+
+ *out_targets = targets;
+ *out_target_cnt = target_cnt;
+ err = target_cnt;
+
+err_out:
+ free(segs);
+ free(lib_segs);
+ if (err < 0)
+ free(targets);
+ return err;
+}
+
+struct bpf_link_usdt {
+ struct bpf_link link;
+
+ struct usdt_manager *usdt_man;
+
+ size_t spec_cnt;
+ int *spec_ids;
+
+ size_t uprobe_cnt;
+ struct {
+ long abs_ip;
+ struct bpf_link *link;
+ } *uprobes;
+};
+
+static int bpf_link_usdt_detach(struct bpf_link *link)
+{
+ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link);
+ struct usdt_manager *man = usdt_link->usdt_man;
+ int i;
+
+ for (i = 0; i < usdt_link->uprobe_cnt; i++) {
+ /* detach underlying uprobe link */
+ bpf_link__destroy(usdt_link->uprobes[i].link);
+ /* there is no need to update specs map because it will be
+ * unconditionally overwritten on subsequent USDT attaches,
+ * but if BPF cookies are not used we need to remove entry
+ * from ip_to_spec_id map, otherwise we'll run into false
+ * conflicting IP errors
+ */
+ if (!man->has_bpf_cookie) {
+ /* not much we can do about errors here */
+ (void)bpf_map_delete_elem(bpf_map__fd(man->ip_to_spec_id_map),
+ &usdt_link->uprobes[i].abs_ip);
+ }
+ }
+
+ /* try to return the list of previously used spec IDs to usdt_manager
+ * for future reuse for subsequent USDT attaches
+ */
+ if (!man->free_spec_ids) {
+ /* if there were no free spec IDs yet, just transfer our IDs */
+ man->free_spec_ids = usdt_link->spec_ids;
+ man->free_spec_cnt = usdt_link->spec_cnt;
+ usdt_link->spec_ids = NULL;
+ } else {
+ /* otherwise concat IDs */
+ size_t new_cnt = man->free_spec_cnt + usdt_link->spec_cnt;
+ int *new_free_ids;
+
+ new_free_ids = libbpf_reallocarray(man->free_spec_ids, new_cnt,
+ sizeof(*new_free_ids));
+ /* If we couldn't resize free_spec_ids, we'll just leak
+ * a bunch of free IDs; this is very unlikely to happen and if
+ * system is so exhausted on memory, it's the least of user's
+ * concerns, probably.
+ * So just do our best here to return those IDs to usdt_manager.
+ */
+ if (new_free_ids) {
+ memcpy(new_free_ids + man->free_spec_cnt, usdt_link->spec_ids,
+ usdt_link->spec_cnt * sizeof(*usdt_link->spec_ids));
+ man->free_spec_ids = new_free_ids;
+ man->free_spec_cnt = new_cnt;
+ }
+ }
+
+ return 0;
+}
+
+static void bpf_link_usdt_dealloc(struct bpf_link *link)
+{
+ struct bpf_link_usdt *usdt_link = container_of(link, struct bpf_link_usdt, link);
+
+ free(usdt_link->spec_ids);
+ free(usdt_link->uprobes);
+ free(usdt_link);
+}
+
+static size_t specs_hash_fn(const void *key, void *ctx)
+{
+ const char *s = key;
+
+ return str_hash(s);
+}
+
+static bool specs_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+ const char *s1 = key1;
+ const char *s2 = key2;
+
+ return strcmp(s1, s2) == 0;
+}
+
+static int allocate_spec_id(struct usdt_manager *man, struct hashmap *specs_hash,
+ struct bpf_link_usdt *link, struct usdt_target *target,
+ int *spec_id, bool *is_new)
+{
+ void *tmp;
+ int err;
+
+ /* check if we already allocated spec ID for this spec string */
+ if (hashmap__find(specs_hash, target->spec_str, &tmp)) {
+ *spec_id = (long)tmp;
+ *is_new = false;
+ return 0;
+ }
+
+ /* otherwise it's a new ID that needs to be set up in specs map and
+ * returned back to usdt_manager when USDT link is detached
+ */
+ tmp = libbpf_reallocarray(link->spec_ids, link->spec_cnt + 1, sizeof(*link->spec_ids));
+ if (!tmp)
+ return -ENOMEM;
+ link->spec_ids = tmp;
+
+ /* get next free spec ID, giving preference to free list, if not empty */
+ if (man->free_spec_cnt) {
+ *spec_id = man->free_spec_ids[man->free_spec_cnt - 1];
+
+ /* cache spec ID for current spec string for future lookups */
+ err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+ if (err)
+ return err;
+
+ man->free_spec_cnt--;
+ } else {
+ /* don't allocate spec ID bigger than what fits in specs map */
+ if (man->next_free_spec_id >= bpf_map__max_entries(man->specs_map))
+ return -E2BIG;
+
+ *spec_id = man->next_free_spec_id;
+
+ /* cache spec ID for current spec string for future lookups */
+ err = hashmap__add(specs_hash, target->spec_str, (void *)(long)*spec_id);
+ if (err)
+ return err;
+
+ man->next_free_spec_id++;
+ }
+
+ /* remember new spec ID in the link for later return back to free list on detach */
+ link->spec_ids[link->spec_cnt] = *spec_id;
+ link->spec_cnt++;
+ *is_new = true;
+ return 0;
+}
+
+struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct bpf_program *prog,
+ pid_t pid, const char *path,
+ const char *usdt_provider, const char *usdt_name,
+ __u64 usdt_cookie)
+{
+ int i, fd, err, spec_map_fd, ip_map_fd;
+ LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct hashmap *specs_hash = NULL;
+ struct bpf_link_usdt *link = NULL;
+ struct usdt_target *targets = NULL;
+ size_t target_cnt;
+ Elf *elf;
+
+ spec_map_fd = bpf_map__fd(man->specs_map);
+ ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map);
+
+ /* TODO: perform path resolution similar to uprobe's */
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err);
+ return libbpf_err_ptr(err);
+ }
+
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ err = -EBADF;
+ pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1));
+ goto err_out;
+ }
+
+ err = sanity_check_usdt_elf(elf, path);
+ if (err)
+ goto err_out;
+
+ /* normalize PID filter */
+ if (pid < 0)
+ pid = -1;
+ else if (pid == 0)
+ pid = getpid();
+
+ /* discover USDT in given binary, optionally limiting
+ * activations to a given PID, if pid > 0
+ */
+ err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name,
+ usdt_cookie, &targets, &target_cnt);
+ if (err <= 0) {
+ err = (err == 0) ? -ENOENT : err;
+ goto err_out;
+ }
+
+ specs_hash = hashmap__new(specs_hash_fn, specs_equal_fn, NULL);
+ if (IS_ERR(specs_hash)) {
+ err = PTR_ERR(specs_hash);
+ goto err_out;
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ link->usdt_man = man;
+ link->link.detach = &bpf_link_usdt_detach;
+ link->link.dealloc = &bpf_link_usdt_dealloc;
+
+ link->uprobes = calloc(target_cnt, sizeof(*link->uprobes));
+ if (!link->uprobes) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ for (i = 0; i < target_cnt; i++) {
+ struct usdt_target *target = &targets[i];
+ struct bpf_link *uprobe_link;
+ bool is_new;
+ int spec_id;
+
+ /* Spec ID can be either reused or newly allocated. If it is
+ * newly allocated, we'll need to fill out spec map, otherwise
+ * entire spec should be valid and can be just used by a new
+ * uprobe. We reuse spec when USDT arg spec is identical. We
+ * also never share specs between two different USDT
+ * attachments ("links"), so all the reused specs already
+ * share USDT cookie value implicitly.
+ */
+ err = allocate_spec_id(man, specs_hash, link, target, &spec_id, &is_new);
+ if (err)
+ goto err_out;
+
+ if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) {
+ err = -errno;
+ pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n",
+ spec_id, usdt_provider, usdt_name, path, err);
+ goto err_out;
+ }
+ if (!man->has_bpf_cookie &&
+ bpf_map_update_elem(ip_map_fd, &target->abs_ip, &spec_id, BPF_NOEXIST)) {
+ err = -errno;
+ if (err == -EEXIST) {
+ pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n",
+ spec_id, usdt_provider, usdt_name, path);
+ } else {
+ pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n",
+ target->abs_ip, spec_id, usdt_provider, usdt_name,
+ path, err);
+ }
+ goto err_out;
+ }
+
+ opts.ref_ctr_offset = target->sema_off;
+ opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0;
+ uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path,
+ target->rel_ip, &opts);
+ err = libbpf_get_error(uprobe_link);
+ if (err) {
+ pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n",
+ i, usdt_provider, usdt_name, path, err);
+ goto err_out;
+ }
+
+ link->uprobes[i].link = uprobe_link;
+ link->uprobes[i].abs_ip = target->abs_ip;
+ link->uprobe_cnt++;
+ }
+
+ free(targets);
+ hashmap__free(specs_hash);
+ elf_end(elf);
+ close(fd);
+
+ return &link->link;
+
+err_out:
+ if (link)
+ bpf_link__destroy(&link->link);
+ free(targets);
+ hashmap__free(specs_hash);
+ if (elf)
+ elf_end(elf);
+ close(fd);
+ return libbpf_err_ptr(err);
+}
+
+/* Parse out USDT ELF note from '.note.stapsdt' section.
+ * Logic inspired by perf's code.
+ */
+static int parse_usdt_note(Elf *elf, const char *path, long base_addr,
+ GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
+ struct usdt_note *note)
+{
+ const char *provider, *name, *args;
+ long addrs[3];
+ size_t len;
+
+ /* sanity check USDT note name and type first */
+ if (strncmp(data + name_off, USDT_NOTE_NAME, nhdr->n_namesz) != 0)
+ return -EINVAL;
+ if (nhdr->n_type != USDT_NOTE_TYPE)
+ return -EINVAL;
+
+ /* sanity check USDT note contents ("description" in ELF terminology) */
+ len = nhdr->n_descsz;
+ data = data + desc_off;
+
+ /* +3 is the very minimum required to store three empty strings */
+ if (len < sizeof(addrs) + 3)
+ return -EINVAL;
+
+ /* get location, base, and semaphore addrs */
+ memcpy(&addrs, data, sizeof(addrs));
+
+ /* parse string fields: provider, name, args */
+ provider = data + sizeof(addrs);
+
+ name = (const char *)memchr(provider, '\0', data + len - provider);
+ if (!name) /* non-zero-terminated provider */
+ return -EINVAL;
+ name++;
+ if (name >= data + len || *name == '\0') /* missing or empty name */
+ return -EINVAL;
+
+ args = memchr(name, '\0', data + len - name);
+ if (!args) /* non-zero-terminated name */
+ return -EINVAL;
+ ++args;
+ if (args >= data + len) /* missing arguments spec */
+ return -EINVAL;
+
+ note->provider = provider;
+ note->name = name;
+ if (*args == '\0' || *args == ':')
+ note->args = "";
+ else
+ note->args = args;
+ note->loc_addr = addrs[0];
+ note->base_addr = addrs[1];
+ note->sema_addr = addrs[2];
+
+ return 0;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg);
+
+static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie)
+{
+ const char *s;
+ int len;
+
+ spec->usdt_cookie = usdt_cookie;
+ spec->arg_cnt = 0;
+
+ s = note->args;
+ while (s[0]) {
+ if (spec->arg_cnt >= USDT_MAX_ARG_CNT) {
+ pr_warn("usdt: too many USDT arguments (> %d) for '%s:%s' with args spec '%s'\n",
+ USDT_MAX_ARG_CNT, note->provider, note->name, note->args);
+ return -E2BIG;
+ }
+
+ len = parse_usdt_arg(s, spec->arg_cnt, &spec->args[spec->arg_cnt]);
+ if (len < 0)
+ return len;
+
+ s += len;
+ spec->arg_cnt++;
+ }
+
+ return 0;
+}
+
+/* Architecture-specific logic for parsing USDT argument location specs */
+
+#if defined(__x86_64__) || defined(__i386__)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ static struct {
+ const char *names[4];
+ size_t pt_regs_off;
+ } reg_map[] = {
+#ifdef __x86_64__
+#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg64)
+#else
+#define reg_off(reg64, reg32) offsetof(struct pt_regs, reg32)
+#endif
+ { {"rip", "eip", "", ""}, reg_off(rip, eip) },
+ { {"rax", "eax", "ax", "al"}, reg_off(rax, eax) },
+ { {"rbx", "ebx", "bx", "bl"}, reg_off(rbx, ebx) },
+ { {"rcx", "ecx", "cx", "cl"}, reg_off(rcx, ecx) },
+ { {"rdx", "edx", "dx", "dl"}, reg_off(rdx, edx) },
+ { {"rsi", "esi", "si", "sil"}, reg_off(rsi, esi) },
+ { {"rdi", "edi", "di", "dil"}, reg_off(rdi, edi) },
+ { {"rbp", "ebp", "bp", "bpl"}, reg_off(rbp, ebp) },
+ { {"rsp", "esp", "sp", "spl"}, reg_off(rsp, esp) },
+#undef reg_off
+#ifdef __x86_64__
+ { {"r8", "r8d", "r8w", "r8b"}, offsetof(struct pt_regs, r8) },
+ { {"r9", "r9d", "r9w", "r9b"}, offsetof(struct pt_regs, r9) },
+ { {"r10", "r10d", "r10w", "r10b"}, offsetof(struct pt_regs, r10) },
+ { {"r11", "r11d", "r11w", "r11b"}, offsetof(struct pt_regs, r11) },
+ { {"r12", "r12d", "r12w", "r12b"}, offsetof(struct pt_regs, r12) },
+ { {"r13", "r13d", "r13w", "r13b"}, offsetof(struct pt_regs, r13) },
+ { {"r14", "r14d", "r14w", "r14b"}, offsetof(struct pt_regs, r14) },
+ { {"r15", "r15d", "r15w", "r15b"}, offsetof(struct pt_regs, r15) },
+#endif
+ };
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(reg_map); i++) {
+ for (j = 0; j < ARRAY_SIZE(reg_map[i].names); j++) {
+ if (strcmp(reg_name, reg_map[i].names[j]) == 0)
+ return reg_map[i].pt_regs_off;
+ }
+ }
+
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ char *reg_name = NULL;
+ int arg_sz, len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%%m[^)] ) %n", &arg_sz, &off, &reg_name, &len) == 3) {
+ /* Memory dereference case, e.g., -4@-20(%rbp) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ %%%ms %n", &arg_sz, &reg_name, &len) == 2) {
+ /* Register read case, e.g., -4@%eax */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ $%ld %n", &arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@$71 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ arg_num, arg_str, arg_sz);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__s390x__)
+
+/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ unsigned int reg;
+ int arg_sz, len;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%r%u ) %n", &arg_sz, &off, &reg, &len) == 3) {
+ /* Memory dereference case, e.g., -2@-28(%r15) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ if (reg > 15) {
+ pr_warn("usdt: unrecognized register '%%r%u'\n", reg);
+ return -EINVAL;
+ }
+ arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
+ } else if (sscanf(arg_str, " %d @ %%r%u %n", &arg_sz, &reg, &len) == 2) {
+ /* Register read case, e.g., -8@%r0 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ if (reg > 15) {
+ pr_warn("usdt: unrecognized register '%%r%u'\n", reg);
+ return -EINVAL;
+ }
+ arg->reg_off = offsetof(user_pt_regs, gprs[reg]);
+ } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@71 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ arg_num, arg_str, arg_sz);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__aarch64__)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ int reg_num;
+
+ if (sscanf(reg_name, "x%d", &reg_num) == 1) {
+ if (reg_num >= 0 && reg_num < 31)
+ return offsetof(struct user_pt_regs, regs[reg_num]);
+ } else if (strcmp(reg_name, "sp") == 0) {
+ return offsetof(struct user_pt_regs, sp);
+ }
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ char *reg_name = NULL;
+ int arg_sz, len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ \[ %m[a-z0-9], %ld ] %n", &arg_sz, &reg_name, &off, &len) == 3) {
+ /* Memory dereference case, e.g., -4@[sp, 96] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ \[ %m[a-z0-9] ] %n", &arg_sz, &reg_name, &len) == 2) {
+ /* Memory dereference case, e.g., -4@[sp] */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@5 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) {
+ /* Register read case, e.g., -8@x4 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ arg_num, arg_str, arg_sz);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#elif defined(__riscv)
+
+static int calc_pt_regs_off(const char *reg_name)
+{
+ static struct {
+ const char *name;
+ size_t pt_regs_off;
+ } reg_map[] = {
+ { "ra", offsetof(struct user_regs_struct, ra) },
+ { "sp", offsetof(struct user_regs_struct, sp) },
+ { "gp", offsetof(struct user_regs_struct, gp) },
+ { "tp", offsetof(struct user_regs_struct, tp) },
+ { "a0", offsetof(struct user_regs_struct, a0) },
+ { "a1", offsetof(struct user_regs_struct, a1) },
+ { "a2", offsetof(struct user_regs_struct, a2) },
+ { "a3", offsetof(struct user_regs_struct, a3) },
+ { "a4", offsetof(struct user_regs_struct, a4) },
+ { "a5", offsetof(struct user_regs_struct, a5) },
+ { "a6", offsetof(struct user_regs_struct, a6) },
+ { "a7", offsetof(struct user_regs_struct, a7) },
+ { "s0", offsetof(struct user_regs_struct, s0) },
+ { "s1", offsetof(struct user_regs_struct, s1) },
+ { "s2", offsetof(struct user_regs_struct, s2) },
+ { "s3", offsetof(struct user_regs_struct, s3) },
+ { "s4", offsetof(struct user_regs_struct, s4) },
+ { "s5", offsetof(struct user_regs_struct, s5) },
+ { "s6", offsetof(struct user_regs_struct, s6) },
+ { "s7", offsetof(struct user_regs_struct, s7) },
+ { "s8", offsetof(struct user_regs_struct, rv_s8) },
+ { "s9", offsetof(struct user_regs_struct, s9) },
+ { "s10", offsetof(struct user_regs_struct, s10) },
+ { "s11", offsetof(struct user_regs_struct, s11) },
+ { "t0", offsetof(struct user_regs_struct, t0) },
+ { "t1", offsetof(struct user_regs_struct, t1) },
+ { "t2", offsetof(struct user_regs_struct, t2) },
+ { "t3", offsetof(struct user_regs_struct, t3) },
+ { "t4", offsetof(struct user_regs_struct, t4) },
+ { "t5", offsetof(struct user_regs_struct, t5) },
+ { "t6", offsetof(struct user_regs_struct, t6) },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(reg_map); i++) {
+ if (strcmp(reg_name, reg_map[i].name) == 0)
+ return reg_map[i].pt_regs_off;
+ }
+
+ pr_warn("usdt: unrecognized register '%s'\n", reg_name);
+ return -ENOENT;
+}
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ char *reg_name = NULL;
+ int arg_sz, len, reg_off;
+ long off;
+
+ if (sscanf(arg_str, " %d @ %ld ( %m[a-z0-9] ) %n", &arg_sz, &off, &reg_name, &len) == 3) {
+ /* Memory dereference case, e.g., -8@-88(s0) */
+ arg->arg_type = USDT_ARG_REG_DEREF;
+ arg->val_off = off;
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else if (sscanf(arg_str, " %d @ %ld %n", &arg_sz, &off, &len) == 2) {
+ /* Constant value case, e.g., 4@5 */
+ arg->arg_type = USDT_ARG_CONST;
+ arg->val_off = off;
+ arg->reg_off = 0;
+ } else if (sscanf(arg_str, " %d @ %m[a-z0-9] %n", &arg_sz, &reg_name, &len) == 2) {
+ /* Register read case, e.g., -8@a1 */
+ arg->arg_type = USDT_ARG_REG;
+ arg->val_off = 0;
+ reg_off = calc_pt_regs_off(reg_name);
+ free(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+ } else {
+ pr_warn("usdt: unrecognized arg #%d spec '%s'\n", arg_num, arg_str);
+ return -EINVAL;
+ }
+
+ arg->arg_signed = arg_sz < 0;
+ if (arg_sz < 0)
+ arg_sz = -arg_sz;
+
+ switch (arg_sz) {
+ case 1: case 2: case 4: case 8:
+ arg->arg_bitshift = 64 - arg_sz * 8;
+ break;
+ default:
+ pr_warn("usdt: unsupported arg #%d (spec '%s') size: %d\n",
+ arg_num, arg_str, arg_sz);
+ return -EINVAL;
+ }
+
+ return len;
+}
+
+#else
+
+static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg)
+{
+ pr_warn("usdt: libbpf doesn't support USDTs on current architecture\n");
+ return -ENOTSUP;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3820608faf57..bafdc5373a13 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -168,9 +168,15 @@ $(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
-$(OUTPUT)/urandom_read: urandom_read.c
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
+ $(call msg,LIB,,$@)
+ $(Q)$(CC) $(CFLAGS) -fPIC $(LDFLAGS) $^ $(LDLIBS) --shared -o $@
+
+$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.c,$^) \
+ liburandom_read.so $(LDLIBS) \
+ -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
$(call msg,MOD,,$@)
@@ -328,12 +334,8 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
linked_vars.skel.h linked_maps.skel.h \
- test_subskeleton.skel.h test_subskeleton_lib.skel.h
-
-# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
-# but that's created as a side-effect of the skel.h generation.
-test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
-test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+ test_subskeleton.skel.h test_subskeleton_lib.skel.h \
+ test_usdt.skel.h
LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
@@ -346,6 +348,11 @@ test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
+# but that's created as a side-effect of the skel.h generation.
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+test_usdt.skel.h-deps := test_usdt.o test_usdt_multispec.o
LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
@@ -400,6 +407,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
$(wildcard $(BPFDIR)/bpf_*.h) \
+ $(wildcard $(BPFDIR)/*.bpf.h) \
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
@@ -491,6 +499,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
btf_helpers.c flow_dissector_load.h \
cap_helpers.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
+ $(OUTPUT)/liburandom_read.so \
ima_setup.sh \
$(wildcard progs/btf_dump_test_case_*.c)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index f973320e6dbf..f061cc20e776 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -8,7 +8,6 @@
#include <fcntl.h>
#include <pthread.h>
#include <sys/sysinfo.h>
-#include <sys/resource.h>
#include <signal.h>
#include "bench.h"
#include "testing_helpers.h"
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
deleted file mode 100644
index 9dac9b30f8ef..000000000000
--- a/tools/testing/selftests/bpf/bpf_rlimit.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <sys/resource.h>
-#include <stdio.h>
-
-static __attribute__((constructor)) void bpf_rlimit_ctor(void)
-{
- struct rlimit rlim_old, rlim_new = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
-
- getrlimit(RLIMIT_MEMLOCK, &rlim_old);
- /* For the sake of running the test cases, we temporarily
- * set rlimit to infinity in order for kernel to focus on
- * errors from actual test cases and not getting noise
- * from hitting memlock limits. The limit is on per-process
- * basis and not a global one, hence destructor not really
- * needed here.
- */
- if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
- perror("Unable to lift memlock rlimit");
- /* Trying out lower limit, but expect potential test
- * case failures from this!
- */
- rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
- rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
- setrlimit(RLIMIT_MEMLOCK, &rlim_new);
- }
-}
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 87fd1aa323a9..c8be6406777f 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -11,7 +11,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "flow_dissector_load.h"
const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
@@ -25,9 +24,8 @@ static void load_and_attach_program(void)
int prog_fd, ret;
struct bpf_object *obj;
- ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- if (ret)
- error(1, 0, "failed to enable libbpf strict mode: %d", ret);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name,
cfg_map_name, NULL, &prog_fd, NULL);
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index 3a7b82bd9e94..e021cc67dc02 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -20,7 +20,6 @@
#include "cgroup_helpers.h"
#include "testing_helpers.h"
-#include "bpf_rlimit.h"
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
@@ -67,6 +66,9 @@ int main(int argc, char **argv)
if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
return 1;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
new file mode 100644
index 000000000000..b17bfa0e0aac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void init_test_filter_set(struct test_filter_set *set)
+{
+ set->cnt = 0;
+ set->tests = NULL;
+}
+
+static void free_test_filter_set(struct test_filter_set *set)
+{
+ int i, j;
+
+ for (i = 0; i < set->cnt; i++) {
+ for (j = 0; j < set->tests[i].subtest_cnt; j++)
+ free((void *)set->tests[i].subtests[j]);
+ free(set->tests[i].subtests);
+ free(set->tests[i].name);
+ }
+
+ free(set->tests);
+ init_test_filter_set(set);
+}
+
+static void test_parse_test_list(void)
+{
+ struct test_filter_set set;
+
+ init_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "test filters count"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie",
+ &set,
+ true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+ "subtest name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true),
+ "parsing");
+ ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing");
+ ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing");
+ if (!ASSERT_EQ(set.cnt, 3, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+ "subtest name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name");
+error:
+ free_test_filter_set(&set);
+}
+
+void test_arg_parsing(void)
+{
+ if (test__start_subtest("test_parse_test_list"))
+ test_parse_test_list();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index d48f6e533e1e..c0c6d410751d 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -11,15 +11,22 @@ static void trigger_func(void)
asm volatile ("");
}
+/* attach point for byname uprobe */
+static void trigger_func2(void)
+{
+ asm volatile ("");
+}
+
void test_attach_probe(void)
{
DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
- int duration = 0;
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
ssize_t uprobe_offset, ref_ctr_offset;
+ struct bpf_link *uprobe_err_link;
bool legacy;
+ char *mem;
/* Check if new-style kprobe/uprobe API is supported.
* Kernels that support new FD-based kprobe and uprobe BPF attachment
@@ -43,9 +50,9 @@ void test_attach_probe(void)
return;
skel = test_attach_probe__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
- if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
goto cleanup;
kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
@@ -90,25 +97,73 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
- /* trigger & validate kprobe && kretprobe */
- usleep(1);
+ /* verify auto-attach fails for old-style uprobe definition */
+ uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+ if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+ "auto-attach should fail for old-style name"))
+ goto cleanup;
+
+ uprobe_opts.func_name = "trigger_func2";
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = 0;
+ skel->links.handle_uprobe_byname =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname,
+ 0 /* this pid */,
+ "/proc/self/exe",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
+ goto cleanup;
+
+ /* verify auto-attach works */
+ skel->links.handle_uretprobe_byname =
+ bpf_program__attach(skel->progs.handle_uretprobe_byname);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
+ goto cleanup;
- if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
- "wrong kprobe res: %d\n", skel->bss->kprobe_res))
+ /* test attach by name for a library function, using the library
+ * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
+ */
+ uprobe_opts.func_name = "malloc";
+ uprobe_opts.retprobe = false;
+ skel->links.handle_uprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2,
+ 0 /* this pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
goto cleanup;
- if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
- "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
+
+ uprobe_opts.func_name = "free";
+ uprobe_opts.retprobe = true;
+ skel->links.handle_uretprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2,
+ -1 /* any pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
goto cleanup;
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ mem = malloc(1);
+ free(mem);
+
/* trigger & validate uprobe & uretprobe */
trigger_func();
- if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
- "wrong uprobe res: %d\n", skel->bss->uprobe_res))
- goto cleanup;
- if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
- "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
- goto cleanup;
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+ ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
cleanup:
test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 5142a7d130b2..2c403ddc8076 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1192,8 +1192,6 @@ static void str_strip_first_line(char *str)
*dst = '\0';
}
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
static void test_task_vma(void)
{
int err, iter_fd = -1, proc_maps_fd = -1;
@@ -1229,7 +1227,7 @@ static void test_task_vma(void)
len = 0;
while (len < CMP_BUFFER_SIZE) {
err = read_fd_into_buffer(iter_fd, task_vma_output + len,
- min(read_size, CMP_BUFFER_SIZE - len));
+ MIN(read_size, CMP_BUFFER_SIZE - len));
if (!err)
break;
if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index d43f548c572c..a4d0cc9d3367 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -36,13 +36,13 @@ struct test_config {
void (*bpf_destroy)(void *);
};
-enum test_state {
+enum bpf_test_state {
_TS_INVALID,
TS_MODULE_LOAD,
TS_MODULE_LOAD_FAIL,
};
-static _Atomic enum test_state state = _TS_INVALID;
+static _Atomic enum bpf_test_state state = _TS_INVALID;
static int sys_finit_module(int fd, const char *param_values, int flags)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8f7a1cef7d87..e9a9a31b2ffe 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -10,8 +10,6 @@
#include "bpf_tcp_nogpl.skel.h"
#include "bpf_dctcp_release.skel.h"
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
@@ -53,7 +51,7 @@ static void *server(void *arg)
while (bytes < total_bytes && !READ_ONCE(stop)) {
nr_sent = send(fd, &batch,
- min(total_bytes - bytes, sizeof(batch)), 0);
+ MIN(total_bytes - bytes, sizeof(batch)), 0);
if (nr_sent == -1 && errno == EINTR)
continue;
if (nr_sent == -1) {
@@ -146,7 +144,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
/* recv total_bytes */
while (bytes < total_bytes && !READ_ONCE(stop)) {
nr_recv = recv(fd, &batch,
- min(total_bytes - bytes, sizeof(batch)), 0);
+ MIN(total_bytes - bytes, sizeof(batch)), 0);
if (nr_recv == -1 && errno == EINTR)
continue;
if (nr_recv == -1)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ec823561b912..ba5bde53d418 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -8,7 +8,6 @@
#include <linux/filter.h>
#include <linux/unistd.h>
#include <bpf/bpf.h>
-#include <sys/resource.h>
#include <libelf.h>
#include <gelf.h>
#include <string.h>
@@ -3974,6 +3973,105 @@ static struct btf_raw_test raw_tests[] = {
.value_type_id = 1,
.max_entries = 1,
},
+{
+ .descr = "type_tag test #2, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_CONST_ENC(3), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #3, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #4, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #5, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(1), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 2), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "type_tag test #6, type tag order",
+ .raw_types = {
+ BTF_PTR_ENC(2), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */
+ BTF_PTR_ENC(6), /* [5] */
+ BTF_CONST_ENC(2), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
}; /* struct btf_raw_test raw_tests[] */
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 3ee2107bbf7a..fe1f0f26ea14 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -53,7 +53,7 @@ void test_fexit_stress(void)
&trace_opts);
if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
goto out;
- link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
+ link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL);
if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
goto out;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 044df13ee069..754e80937e5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -4,6 +4,7 @@
#include <network_helpers.h>
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
+#include "for_each_map_elem_write_key.skel.h"
static unsigned int duration;
@@ -129,10 +130,21 @@ out:
for_each_array_map_elem__destroy(skel);
}
+static void test_write_map_key(void)
+{
+ struct for_each_map_elem_write_key *skel;
+
+ skel = for_each_map_elem_write_key__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load"))
+ for_each_map_elem_write_key__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
test_hash_map();
if (test__start_subtest("array_map"))
test_array_map();
+ if (test__start_subtest("write_map_key"))
+ test_write_map_key();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
index e1de5f80c3b2..0354f9b82c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
+++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
@@ -6,11 +6,10 @@
void test_helper_restricted(void)
{
int prog_i = 0, prog_cnt;
- int duration = 0;
do {
struct test_helper_restricted *test;
- int maybeOK;
+ int err;
test = test_helper_restricted__open();
if (!ASSERT_OK_PTR(test, "open"))
@@ -21,12 +20,11 @@ void test_helper_restricted(void)
for (int j = 0; j < prog_cnt; ++j) {
struct bpf_program *prog = *test->skeleton->progs[j].prog;
- maybeOK = bpf_program__set_autoload(prog, prog_i == j);
- ASSERT_OK(maybeOK, "set autoload");
+ bpf_program__set_autoload(prog, true);
}
- maybeOK = test_helper_restricted__load(test);
- CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted");
+ err = test_helper_restricted__load(test);
+ ASSERT_ERR(err, "load_should_fail");
test_helper_restricted__destroy(test);
} while (++prog_i < prog_cnt);
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index f6933b06daf8..1d7a2f1e0731 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -138,12 +138,16 @@ cleanup:
test_ksyms_weak_lskel__destroy(skel);
}
-static void test_write_check(void)
+static void test_write_check(bool test_handler1)
{
struct test_ksyms_btf_write_check *skel;
- skel = test_ksyms_btf_write_check__open_and_load();
- ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
+ skel = test_ksyms_btf_write_check__open();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open"))
+ return;
+ bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false);
+ ASSERT_ERR(test_ksyms_btf_write_check__load(skel),
+ "unexpected load of a prog writing to ksym memory\n");
test_ksyms_btf_write_check__destroy(skel);
}
@@ -179,6 +183,9 @@ void test_ksyms_btf(void)
if (test__start_subtest("weak_ksyms_lskel"))
test_weak_syms_lskel();
- if (test__start_subtest("write_check"))
- test_write_check();
+ if (test__start_subtest("write_check1"))
+ test_write_check(true);
+
+ if (test__start_subtest("write_check2"))
+ test_write_check(false);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
index e9916f2817ec..cad664546912 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -14,6 +14,12 @@ void test_linked_funcs(void)
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ /* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and
+ * are set to not autoload by default
+ */
+ bpf_program__set_autoload(skel->progs.handler1, true);
+ bpf_program__set_autoload(skel->progs.handler2, true);
+
skel->rodata->my_tid = syscall(SYS_gettid);
skel->bss->syscall_id = SYS_getpgid;
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
new file mode 100644
index 000000000000..be3a956cb3a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_fixup.skel.h"
+
+enum trunc_type {
+ TRUNC_NONE,
+ TRUNC_PARTIAL,
+ TRUNC_FULL,
+};
+
+static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.bad_relo, true);
+ memset(log_buf, 0, sizeof(log_buf));
+ bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ "0: <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation <byte_sz> ",
+ "log_buf_part1");
+
+ switch (trunc_type) {
+ case TRUNC_NONE:
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+ "log_buf_part2");
+ ASSERT_HAS_SUBSTR(log_buf,
+ "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+ "log_buf_end");
+ break;
+ case TRUNC_PARTIAL:
+ /* we should get full libbpf message patch */
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+ "log_buf_part2");
+ /* we shouldn't get full end of BPF verifier log */
+ ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+ "log_buf_end");
+ break;
+ case TRUNC_FULL:
+ /* we shouldn't get second part of libbpf message patch */
+ ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"),
+ "log_buf_part2");
+ /* we shouldn't get full end of BPF verifier log */
+ ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+ "log_buf_end");
+ break;
+ }
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+static void bad_core_relo_subprog(void)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.bad_relo_subprog, true);
+ bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ /* there should be no prog loading log because we specified per-prog log buf */
+ ASSERT_HAS_SUBSTR(log_buf,
+ ": <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation <byte_off> ",
+ "log_buf");
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n",
+ "log_buf");
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+void test_log_fixup(void)
+{
+ if (test__start_subtest("bad_core_relo_trunc_none"))
+ bad_core_relo(0, TRUNC_NONE /* full buf */);
+ if (test__start_subtest("bad_core_relo_trunc_partial"))
+ bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
+ if (test__start_subtest("bad_core_relo_trunc_full"))
+ bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */);
+ if (test__start_subtest("bad_core_relo_subprog"))
+ bad_core_relo_subprog();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
new file mode 100644
index 000000000000..9e2fbda64a65
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "map_kptr.skel.h"
+
+void test_map_kptr(void)
+{
+ struct map_kptr *skel;
+ int key = 0, ret;
+ char buf[24];
+
+ skel = map_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+ return;
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
+ ASSERT_OK(ret, "array_map update");
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.array_map), &key, buf, 0);
+ ASSERT_OK(ret, "array_map update2");
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_map), &key, buf, 0);
+ ASSERT_OK(ret, "hash_map update");
+ ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_map), &key);
+ ASSERT_OK(ret, "hash_map delete");
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key, buf, 0);
+ ASSERT_OK(ret, "hash_malloc_map update");
+ ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.hash_malloc_map), &key);
+ ASSERT_OK(ret, "hash_malloc_map delete");
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.lru_hash_map), &key, buf, 0);
+ ASSERT_OK(ret, "lru_hash_map update");
+ ret = bpf_map_delete_elem(bpf_map__fd(skel->maps.lru_hash_map), &key);
+ ASSERT_OK(ret, "lru_hash_map delete");
+
+ map_kptr__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
index 954964f0ac3d..d3915c58d0e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/netcnt.c
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -25,7 +25,7 @@ void serial_test_netcnt(void)
if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
return;
- nproc = get_nprocs_conf();
+ nproc = bpf_num_possible_cpus();
percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
new file mode 100644
index 000000000000..14f2796076e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void clear_test_state(struct test_state *state)
+{
+ state->error_cnt = 0;
+ state->sub_succ_cnt = 0;
+ state->skip_cnt = 0;
+}
+
+void test_prog_tests_framework(void)
+{
+ struct test_state *state = env.test_state;
+
+ /* in all the ASSERT calls below we need to return on the first
+ * error due to the fact that we are cleaning the test state after
+ * each dummy subtest
+ */
+
+ /* test we properly count skipped tests with subtests */
+ if (test__start_subtest("test_good_subtest"))
+ test__end_subtest();
+ if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+
+ if (test__start_subtest("test_skip_subtest")) {
+ test__skip();
+ test__end_subtest();
+ }
+ if (test__start_subtest("test_skip_subtest")) {
+ test__skip();
+ test__end_subtest();
+ }
+ if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+
+ if (test__start_subtest("test_fail_subtest")) {
+ test__fail();
+ test__end_subtest();
+ }
+ if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 873323fb18ba..739d2ea6ca55 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -1,21 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-static void toggle_object_autoload_progs(const struct bpf_object *obj,
- const char *name_load)
-{
- struct bpf_program *prog;
-
- bpf_object__for_each_program(prog, obj) {
- const char *name = bpf_program__name(prog);
-
- if (!strcmp(name_load, name))
- bpf_program__set_autoload(prog, true);
- else
- bpf_program__set_autoload(prog, false);
- }
-}
-
void test_reference_tracking(void)
{
const char *file = "test_sk_lookup_kern.o";
@@ -39,6 +24,7 @@ void test_reference_tracking(void)
goto cleanup;
bpf_object__for_each_program(prog, obj_iter) {
+ struct bpf_program *p;
const char *name;
name = bpf_program__name(prog);
@@ -49,7 +35,12 @@ void test_reference_tracking(void)
if (!ASSERT_OK_PTR(obj, "obj_open_file"))
goto cleanup;
- toggle_object_autoload_progs(obj, name);
+ /* all programs are not loaded by default, so just set
+ * autoload to true for the single prog under test
+ */
+ p = bpf_object__find_program_by_name(obj, name);
+ bpf_program__set_autoload(p, true);
+
/* Expect verifier failure if test name has 'err' */
if (strncmp(name, "err_", sizeof("err_") - 1) == 0) {
libbpf_print_fn_t old_print_fn;
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
new file mode 100644
index 000000000000..d7f83c0a40a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_load_bytes.skel.h"
+
+void test_skb_load_bytes(void)
+{
+ struct skb_load_bytes *skel;
+ int err, prog_fd, test_result;
+ struct __sk_buff skb = { 0 };
+
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ skel = skb_load_bytes__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.skb_process);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto out;
+
+ skel->bss->load_offset = (uint32_t)(-1);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ test_result = skel->bss->test_result;
+ if (!ASSERT_EQ(test_result, -EFAULT, "offset -1"))
+ goto out;
+
+ skel->bss->load_offset = (uint32_t)10;
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ test_result = skel->bss->test_result;
+ if (!ASSERT_EQ(test_result, 0, "offset 10"))
+ goto out;
+
+out:
+ skb_load_bytes__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 394ebfc3bbf3..4be6fdb78c6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -83,8 +83,6 @@ cleanup:
test_snprintf__destroy(skel);
}
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
static int load_single_snprintf(char *fmt)
{
@@ -95,7 +93,7 @@ static int load_single_snprintf(char *fmt)
if (!skel)
return -EINVAL;
- memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+ memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10));
ret = test_snprintf_single__load(skel);
test_snprintf_single__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 7ad66a247c02..958dae769c52 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -949,7 +949,6 @@ fail:
return -1;
}
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
enum {
SRC_TO_TARGET = 0,
TARGET_TO_SRC = 1,
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 509e21d5cb9d..b90ee47d3111 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -81,6 +81,7 @@ void test_test_global_funcs(void)
{ "test_global_func14.o", "reference type('FWD S') size cannot be determined" },
{ "test_global_func15.o", "At program exit the register R0 has value" },
{ "test_global_func16.o", "invalid indirect read from stack" },
+ { "test_global_func17.o", "Caller passes invalid args into func#1" },
};
libbpf_print_fn_t old_print_fn = NULL;
int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
index b57a3009465f..7ddd6615b7e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
@@ -44,16 +44,12 @@ static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name,
static void test_strncmp_ret(void)
{
struct strncmp_test *skel;
- struct bpf_program *prog;
int err, got;
skel = strncmp_test__open();
if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
return;
- bpf_object__for_each_program(prog, skel->obj)
- bpf_program__set_autoload(prog, false);
-
bpf_program__set_autoload(skel->progs.do_strncmp, true);
err = strncmp_test__load(skel);
@@ -91,18 +87,13 @@ out:
static void test_strncmp_bad_not_const_str_size(void)
{
struct strncmp_test *skel;
- struct bpf_program *prog;
int err;
skel = strncmp_test__open();
if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
return;
- bpf_object__for_each_program(prog, skel->obj)
- bpf_program__set_autoload(prog, false);
-
- bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size,
- true);
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true);
err = strncmp_test__load(skel);
ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size");
@@ -113,18 +104,13 @@ static void test_strncmp_bad_not_const_str_size(void)
static void test_strncmp_bad_writable_target(void)
{
struct strncmp_test *skel;
- struct bpf_program *prog;
int err;
skel = strncmp_test__open();
if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
return;
- bpf_object__for_each_program(prog, skel->obj)
- bpf_program__set_autoload(prog, false);
-
- bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target,
- true);
+ bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true);
err = strncmp_test__load(skel);
ASSERT_ERR(err, "strncmp_test load bad_writable_target");
@@ -135,18 +121,13 @@ static void test_strncmp_bad_writable_target(void)
static void test_strncmp_bad_not_null_term_target(void)
{
struct strncmp_test *skel;
- struct bpf_program *prog;
int err;
skel = strncmp_test__open();
if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
return;
- bpf_object__for_each_program(prog, skel->obj)
- bpf_program__set_autoload(prog, false);
-
- bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target,
- true);
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true);
err = strncmp_test__load(skel);
ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target");
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
new file mode 100644
index 000000000000..35b87c7ba5be
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include "test_uprobe_autoattach.skel.h"
+
+/* uprobe attach point */
+static noinline int autoattach_trigger_func(int arg)
+{
+ asm volatile ("");
+ return arg + 1;
+}
+
+void test_uprobe_autoattach(void)
+{
+ struct test_uprobe_autoattach *skel;
+ int trigger_val = 100, trigger_ret;
+ size_t malloc_sz = 1;
+ char *mem;
+
+ skel = test_uprobe_autoattach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate uprobe & uretprobe */
+ trigger_ret = autoattach_trigger_func(trigger_val);
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ mem = malloc(malloc_sz);
+
+ ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
+
+ free(mem);
+cleanup:
+ test_uprobe_autoattach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
new file mode 100644
index 000000000000..a71f51bdc08d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -0,0 +1,421 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "../sdt.h"
+
+#include "test_usdt.skel.h"
+#include "test_urandom_usdt.skel.h"
+
+int lets_test_this(int);
+
+static volatile int idx = 2;
+static volatile __u64 bla = 0xFEDCBA9876543210ULL;
+static volatile short nums[] = {-1, -2, -3, };
+
+static volatile struct {
+ int x;
+ signed char y;
+} t1 = { 1, -127 };
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short test_usdt0_semaphore SEC(".probes");
+unsigned short test_usdt3_semaphore SEC(".probes");
+unsigned short test_usdt12_semaphore SEC(".probes");
+
+static void __always_inline trigger_func(int x) {
+ long y = 42;
+
+ if (test_usdt0_semaphore)
+ STAP_PROBE(test, usdt0);
+ if (test_usdt3_semaphore)
+ STAP_PROBE3(test, usdt3, x, y, &bla);
+ if (test_usdt12_semaphore) {
+ STAP_PROBE12(test, usdt12,
+ x, x + 1, y, x + y, 5,
+ y / 7, bla, &bla, -9, nums[x],
+ nums[idx], t1.y);
+ }
+}
+
+static void subtest_basic_usdt(void)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt0 won't be auto-attached */
+ opts.usdt_cookie = 0xcafedeadbeeffeed;
+ skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt0", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
+ goto cleanup;
+
+ trigger_func(1);
+
+ ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+
+ ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
+ ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+
+ /* auto-attached usdt3 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+ /* auto-attached usdt12 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
+ ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt");
+
+ ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5");
+ ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6");
+ ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7");
+ ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8");
+ ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9");
+ ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10");
+ ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
+ ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+
+ /* trigger_func() is marked __always_inline, so USDT invocations will be
+ * inlined in two different places, meaning that each USDT will have
+ * at least 2 different places to be attached to. This verifies that
+ * bpf_program__attach_usdt() handles this properly and attaches to
+ * all possible places of USDT invocation.
+ */
+ trigger_func(2);
+
+ ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+
+ /* only check values that depend on trigger_func()'s input value */
+ ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
+
+ ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10");
+
+ /* detach and re-attach usdt3 */
+ bpf_link__destroy(skel->links.usdt3);
+
+ opts.usdt_cookie = 0xBADC00C51E;
+ skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */,
+ "/proc/self/exe", "test", "usdt3", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
+ goto cleanup;
+
+ trigger_func(3);
+
+ ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+ /* this time usdt3 has custom cookie */
+ ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
+unsigned short test_usdt_100_semaphore SEC(".probes");
+unsigned short test_usdt_300_semaphore SEC(".probes");
+unsigned short test_usdt_400_semaphore SEC(".probes");
+
+#define R10(F, X) F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \
+ F(X+5); F(X+6); F(X+7); F(X+8); F(X+9);
+#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \
+ R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90);
+
+/* carefully control that we get exactly 100 inlines by preventing inlining */
+static void __always_inline f100(int x)
+{
+ STAP_PROBE1(test, usdt_100, x);
+}
+
+__weak void trigger_100_usdts(void)
+{
+ R100(f100, 0);
+}
+
+/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as
+ * many slots for specs. It's important that each STAP_PROBE2() invocation
+ * (after untolling) gets different arg spec due to compiler inlining i as
+ * a constant
+ */
+static void __always_inline f300(int x)
+{
+ STAP_PROBE1(test, usdt_300, x);
+}
+
+__weak void trigger_300_usdts(void)
+{
+ R100(f300, 0);
+ R100(f300, 100);
+ R100(f300, 200);
+}
+
+static void __always_inline f400(int x __attribute__((unused)))
+{
+ static int y;
+
+ STAP_PROBE1(test, usdt_400, y++);
+}
+
+/* this time we have 400 different USDT call sites, but they have uniform
+ * argument location, so libbpf's spec string deduplication logic should keep
+ * spec count use very small and so we should be able to attach to all 400
+ * call sites
+ */
+__weak void trigger_400_usdts(void)
+{
+ R100(f400, 0);
+ R100(f400, 100);
+ R100(f400, 200);
+ R100(f400, 300);
+}
+
+static void subtest_multispec_usdt(void)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err, i;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt_100 is auto-attached and there are 100 inlined call sites,
+ * let's validate that all of them are properly attached to and
+ * handled from BPF side
+ */
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+
+ /* Stress test free spec ID tracking. By default libbpf allows up to
+ * 256 specs to be used, so if we don't return free spec IDs back
+ * after few detachments and re-attachments we should run out of
+ * available spec IDs.
+ */
+ for (i = 0; i < 2; i++) {
+ bpf_link__destroy(skel->links.usdt_100);
+
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_100", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach"))
+ goto cleanup;
+
+ bss->usdt_100_sum = 0;
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+ }
+
+ /* Now let's step it up and try to attach USDT that requires more than
+ * 256 attach points with different specs for each.
+ * Note that we need trigger_300_usdts() only to actually have 300
+ * USDT call sites, we are not going to actually trace them.
+ */
+ trigger_300_usdts();
+
+ /* we'll reuse usdt_100 BPF program for usdt_300 test */
+ bpf_link__destroy(skel->links.usdt_100);
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
+ "test", "usdt_300", NULL);
+ err = -errno;
+ if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach"))
+ goto cleanup;
+ ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err");
+
+ /* let's check that there are no "dangling" BPF programs attached due
+ * to partial success of the above test:usdt_300 attachment
+ */
+ bss->usdt_100_called = 0;
+ bss->usdt_100_sum = 0;
+
+ f300(777); /* this is 301st instance of usdt_300 */
+
+ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
+ ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+
+ /* This time we have USDT with 400 inlined invocations, but arg specs
+ * should be the same across all sites, so libbpf will only need to
+ * use one spec and thus we'll be able to attach 400 uprobes
+ * successfully.
+ *
+ * Again, we are reusing usdt_100 BPF program.
+ */
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_400", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach"))
+ goto cleanup;
+
+ trigger_400_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
+ ASSERT_EQ(bss->usdt_100_sum, 399 * 400 / 2, "usdt_400_sum");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
+static FILE *urand_spawn(int *pid)
+{
+ FILE *f;
+
+ /* urandom_read's stdout is wired into f */
+ f = popen("./urandom_read 1 report-pid", "r");
+ if (!f)
+ return NULL;
+
+ if (fscanf(f, "%d", pid) != 1) {
+ pclose(f);
+ return NULL;
+ }
+
+ return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+ int exit_code;
+
+ /* pclose() waits for child process to exit and returns their exit code */
+ exit_code = pclose(*urand_pipe);
+ *urand_pipe = NULL;
+
+ return exit_code;
+}
+
+static void subtest_urandom_usdt(bool auto_attach)
+{
+ struct test_urandom_usdt *skel;
+ struct test_urandom_usdt__bss *bss;
+ struct bpf_link *l;
+ FILE *urand_pipe = NULL;
+ int err, urand_pid = 0;
+
+ skel = test_urandom_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ urand_pipe = urand_spawn(&urand_pid);
+ if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->urand_pid = urand_pid;
+
+ if (auto_attach) {
+ err = test_urandom_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_auto_attach"))
+ goto cleanup;
+ } else {
+ l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_without_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_with_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_with_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_with_sema = l;
+
+ }
+
+ /* trigger urandom_read USDTs */
+ ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+ ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt");
+ ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum");
+
+ ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt");
+ ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum");
+
+cleanup:
+ if (urand_pipe)
+ pclose(urand_pipe);
+ test_urandom_usdt__destroy(skel);
+}
+
+void test_usdt(void)
+{
+ if (test__start_subtest("basic"))
+ subtest_basic_usdt();
+ if (test__start_subtest("multispec"))
+ subtest_multispec_usdt();
+ if (test__start_subtest("urand_auto_attach"))
+ subtest_urandom_usdt(true /* auto_attach */);
+ if (test__start_subtest("urand_pid_attach"))
+ subtest_urandom_usdt(false /* auto_attach */);
+}
diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c
index f5ca142abf8f..dd9b30a0f0fc 100644
--- a/tools/testing/selftests/bpf/progs/exhandler_kern.c
+++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c
@@ -7,6 +7,8 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+
char _license[] SEC("license") = "GPL";
unsigned int exception_triggered;
@@ -37,7 +39,16 @@ int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags)
*/
work = task->task_works;
func = work->func;
- if (!work && !func)
- exception_triggered++;
+ /* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction.
+ * To workaround the issue, use barrier_var() and rewrite as below to
+ * prevent compiler from generating verifier-unfriendly code.
+ */
+ barrier_var(work);
+ if (work)
+ return 0;
+ barrier_var(func);
+ if (func)
+ return 0;
+ exception_triggered++;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
new file mode 100644
index 000000000000..8e545865ea33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array_map SEC(".maps");
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ void *data)
+{
+ bpf_get_current_comm(key, sizeof(*key));
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int test_map_key_write(const void *ctx)
+{
+ bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
index b964ec1390c2..b05571bc67d5 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs1.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
/* weak and shared between two files */
const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx1(__u64 *ctx)
/* this weak instance should win because it's the first one */
__weak int set_output_weak(int x)
{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = bpf_core_type_size(struct task_struct);
+
output_weak1 = x;
return x;
}
@@ -53,12 +61,17 @@ extern int set_output_val2(int x);
/* here we'll force set_output_ctx2() to be __hidden in the final obj file */
__hidden extern void set_output_ctx2(__u64 *ctx);
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
int BPF_PROG(handler1, struct pt_regs *regs, long id)
{
+ static volatile int whatever;
+
if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
return 0;
+ /* make sure we have CO-RE relocations in main program */
+ whatever = bpf_core_type_size(struct task_struct);
+
set_output_val2(1000);
set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
index 575e958e60b7..ee7e3848ee4f 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs2.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -4,6 +4,7 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
/* weak and shared between both files */
const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx2(__u64 *ctx)
/* this weak instance should lose, because it will be processed second */
__weak int set_output_weak(int x)
{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = 2 * bpf_core_type_size(struct task_struct);
+
output_weak2 = x;
return 2 * x;
}
@@ -53,12 +61,17 @@ extern int set_output_val1(int x);
/* here we'll force set_output_ctx1() to be __hidden in the final obj file */
__hidden extern void set_output_ctx1(__u64 *ctx);
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
int BPF_PROG(handler2, struct pt_regs *regs, long id)
{
+ static volatile int whatever;
+
if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
return 0;
+ /* make sure we have CO-RE relocations in main program */
+ whatever = bpf_core_type_size(struct task_struct);
+
set_output_val1(2000);
set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
new file mode 100644
index 000000000000..1b0e0409eaa5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_value {
+ struct prog_test_ref_kfunc __kptr *unref_ptr;
+ struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} hash_map SEC(".maps");
+
+struct hash_malloc_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_malloc_map SEC(".maps");
+
+struct lru_hash_map {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} lru_hash_map SEC(".maps");
+
+#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \
+ struct { \
+ __uint(type, map_type); \
+ __uint(max_entries, 1); \
+ __uint(key_size, sizeof(int)); \
+ __uint(value_size, sizeof(int)); \
+ __array(values, struct inner_map_type); \
+ } name SEC(".maps") = { \
+ .values = { [0] = &inner_map_type }, \
+ }
+
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
+
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+
+static void test_kptr_unref(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p;
+
+ p = v->unref_ptr;
+ /* store untrusted_ptr_or_null_ */
+ v->unref_ptr = p;
+ if (!p)
+ return;
+ if (p->a + p->b > 100)
+ return;
+ /* store untrusted_ptr_ */
+ v->unref_ptr = p;
+ /* store NULL */
+ v->unref_ptr = NULL;
+}
+
+static void test_kptr_ref(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p;
+
+ p = v->ref_ptr;
+ /* store ptr_or_null_ */
+ v->unref_ptr = p;
+ if (!p)
+ return;
+ if (p->a + p->b > 100)
+ return;
+ /* store NULL */
+ p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (!p)
+ return;
+ if (p->a + p->b > 100) {
+ bpf_kfunc_call_test_release(p);
+ return;
+ }
+ /* store ptr_ */
+ v->unref_ptr = p;
+ bpf_kfunc_call_test_release(p);
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return;
+ /* store ptr_ */
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (!p)
+ return;
+ if (p->a + p->b > 100) {
+ bpf_kfunc_call_test_release(p);
+ return;
+ }
+ bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr_get(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p;
+
+ p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
+ if (!p)
+ return;
+ if (p->a + p->b > 100) {
+ bpf_kfunc_call_test_release(p);
+ return;
+ }
+ bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr(struct map_value *v)
+{
+ test_kptr_unref(v);
+ test_kptr_ref(v);
+ test_kptr_get(v);
+}
+
+SEC("tc")
+int test_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int i, key = 0;
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return 0; \
+ test_kptr(v)
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+#undef TEST
+ return 0;
+}
+
+SEC("tc")
+int test_map_in_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int i, key = 0;
+ void *map;
+
+#define TEST(map_in_map) \
+ map = bpf_map_lookup_elem(&map_in_map, &key); \
+ if (!map) \
+ return 0; \
+ v = bpf_map_lookup_elem(map, &key); \
+ if (!v) \
+ return 0; \
+ test_kptr(v)
+
+ TEST(array_of_array_maps);
+ TEST(array_of_hash_maps);
+ TEST(array_of_hash_malloc_maps);
+ TEST(array_of_lru_hash_maps);
+ TEST(hash_of_array_maps);
+ TEST(hash_of_hash_maps);
+ TEST(hash_of_hash_malloc_maps);
+ TEST(hash_of_lru_hash_maps);
+
+#undef TEST
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
index b3fcb5274ee0..f793280a3238 100644
--- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -35,10 +35,10 @@ int oncpu(void *ctx)
long val;
val = bpf_get_stackid(ctx, &stackmap, 0);
- if (val > 0)
+ if (val >= 0)
stackid_kernel = 2;
val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
- if (val > 0)
+ if (val >= 0)
stackid_user = 2;
trace = bpf_map_lookup_elem(&stackdata_map, &key);
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 4896fdf816f7..92331053dba3 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -826,8 +826,9 @@ out:
SEC("kprobe/vfs_link")
int BPF_KPROBE(kprobe__vfs_link,
- struct dentry* old_dentry, struct inode* dir,
- struct dentry* new_dentry, struct inode** delegated_inode)
+ struct dentry* old_dentry, struct user_namespace *mnt_userns,
+ struct inode* dir, struct dentry* new_dentry,
+ struct inode** delegated_inode)
{
struct bpf_func_stats_ctx stats_ctx;
bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 1ed28882daf3..5d3dc4d66d47 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -299,7 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
+#ifdef UNROLL_COUNT
+#pragma clang loop unroll_count(UNROLL_COUNT)
+#else
#pragma clang loop unroll(full)
+#endif
#endif /* NO_UNROLL */
/* Unwind python stack */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
index cb49b89e37cd..ce1aa5189cc4 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -1,9 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
-/* clang will not unroll the loop 600 times.
- * Instead it will unroll it to the amount it deemed
- * appropriate, but the loop will still execute 600 times.
- * Total program size is around 90k insns
+/* Full unroll of 600 iterations will have total
+ * program size close to 298k insns and this may
+ * cause BPF_JMP insn out of 16-bit integer range.
+ * So limit the unroll size to 150 so the
+ * total program size is around 80k insns but
+ * the loop will still execute 600 times.
*/
+#define UNROLL_COUNT 150
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/skb_load_bytes.c b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
new file mode 100644
index 000000000000..e4252fd973be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 load_offset = 0;
+int test_result = 0;
+
+SEC("tc")
+int skb_process(struct __sk_buff *skb)
+{
+ char buf[16];
+
+ test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c
index 900d930d48a8..769668feed48 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_test.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_test.c
@@ -19,7 +19,7 @@ unsigned int no_const_str_size = STRNCMP_STR_SZ;
char _license[] SEC("license") = "GPL";
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int do_strncmp(void *ctx)
{
if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
@@ -29,7 +29,7 @@ int do_strncmp(void *ctx)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int strncmp_bad_not_const_str_size(void *ctx)
{
/* The value of string size is not const, so will fail */
@@ -37,7 +37,7 @@ int strncmp_bad_not_const_str_size(void *ctx)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int strncmp_bad_writable_target(void *ctx)
{
/* Compared target is not read-only, so will fail */
@@ -45,7 +45,7 @@ int strncmp_bad_writable_target(void *ctx)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int strncmp_bad_not_null_term_target(void *ctx)
{
/* Compared target is not null-terminated, so will fail */
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 8056a4c6d918..af994d16bb10 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -10,6 +10,10 @@ int kprobe_res = 0;
int kretprobe_res = 0;
int uprobe_res = 0;
int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+int uretprobe_byname_res = 0;
+int uprobe_byname2_res = 0;
+int uretprobe_byname2_res = 0;
SEC("kprobe/sys_nanosleep")
int handle_kprobe(struct pt_regs *ctx)
@@ -25,18 +29,51 @@ int BPF_KRETPROBE(handle_kretprobe)
return 0;
}
-SEC("uprobe/trigger_func")
+SEC("uprobe")
int handle_uprobe(struct pt_regs *ctx)
{
uprobe_res = 3;
return 0;
}
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
int handle_uretprobe(struct pt_regs *ctx)
{
uretprobe_res = 4;
return 0;
}
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_res = 5;
+ return 0;
+}
+
+/* use auto-attach format for section definition. */
+SEC("uretprobe//proc/self/exe:trigger_func2")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+ uretprobe_byname_res = 6;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+ unsigned int size = PT_REGS_PARM1(ctx);
+
+ /* verify malloc size */
+ if (size == 1)
+ uprobe_byname2_res = 7;
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+ uretprobe_byname2_res = 8;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 2d3a7710e2ce..0e2222968918 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -37,14 +37,14 @@ int handle_kretprobe(struct pt_regs *ctx)
return 0;
}
-SEC("uprobe/trigger_func")
+SEC("uprobe")
int handle_uprobe(struct pt_regs *ctx)
{
update(ctx, &uprobe_res);
return 0;
}
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
int handle_uretprobe(struct pt_regs *ctx)
{
update(ctx, &uretprobe_res);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
new file mode 100644
index 000000000000..2b8b9b8ba018
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline int foo(int *p)
+{
+ return p ? (*p = 42) : 0;
+}
+
+const volatile int i;
+
+SEC("tc")
+int test_cls(struct __sk_buff *skb)
+{
+ return foo((int *)&i);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
index 68d64c365f90..20ef9d433b97 100644
--- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -56,7 +56,7 @@ static void spin_lock_work(void)
}
}
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
int raw_tp_timer(void *ctx)
{
timer_work();
@@ -64,7 +64,7 @@ int raw_tp_timer(void *ctx)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int tp_timer(void *ctx)
{
timer_work();
@@ -72,7 +72,7 @@ int tp_timer(void *ctx)
return 0;
}
-SEC("kprobe/sys_nanosleep")
+SEC("?kprobe/sys_nanosleep")
int kprobe_timer(void *ctx)
{
timer_work();
@@ -80,7 +80,7 @@ int kprobe_timer(void *ctx)
return 0;
}
-SEC("perf_event")
+SEC("?perf_event")
int perf_event_timer(void *ctx)
{
timer_work();
@@ -88,7 +88,7 @@ int perf_event_timer(void *ctx)
return 0;
}
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
int raw_tp_spin_lock(void *ctx)
{
spin_lock_work();
@@ -96,7 +96,7 @@ int raw_tp_spin_lock(void *ctx)
return 0;
}
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
int tp_spin_lock(void *ctx)
{
spin_lock_work();
@@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx)
return 0;
}
-SEC("kprobe/sys_nanosleep")
+SEC("?kprobe/sys_nanosleep")
int kprobe_spin_lock(void *ctx)
{
spin_lock_work();
@@ -112,7 +112,7 @@ int kprobe_spin_lock(void *ctx)
return 0;
}
-SEC("perf_event")
+SEC("?perf_event")
int perf_event_spin_lock(void *ctx)
{
spin_lock_work();
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
index 2180c41cd890..a72a5bf3812a 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -8,7 +8,7 @@
extern const int bpf_prog_active __ksym; /* int type global var. */
SEC("raw_tp/sys_enter")
-int handler(const void *ctx)
+int handler1(const void *ctx)
{
int *active;
__u32 cpu;
@@ -26,4 +26,20 @@ int handler(const void *ctx)
return 0;
}
+__noinline int write_active(int *p)
+{
+ return p ? (*p = 42) : 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ int *active;
+ __u32 cpu;
+
+ active = bpf_this_cpu_ptr(&bpf_prog_active);
+ write_active(active);
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 19e4d2071c60..c8bc0c6947aa 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -218,7 +218,7 @@ static __noinline bool get_packet_dst(struct real_definition **real,
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c
new file mode 100644
index 000000000000..a78980d897b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct___bad {
+ int pid;
+ int fake_field;
+ void *fake_field_subprog;
+} __attribute__((preserve_access_index));
+
+SEC("?raw_tp/sys_enter")
+int bad_relo(const void *ctx)
+{
+ static struct task_struct___bad *t;
+
+ return bpf_core_field_size(t->fake_field);
+}
+
+static __noinline int bad_subprog(void)
+{
+ static struct task_struct___bad *t;
+
+ /* ugliness below is a field offset relocation */
+ return (void *)&t->fake_field_subprog - (void *)t;
+}
+
+SEC("?raw_tp/sys_enter")
+int bad_relo_subprog(const void *ctx)
+{
+ static struct task_struct___bad *t;
+
+ return bad_subprog() + bpf_core_field_size(t->pid);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 02f79356d5eb..98c6493d9b91 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -89,7 +89,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
static inline int
handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -121,7 +120,6 @@ assign:
static inline int
handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -161,7 +159,7 @@ assign:
SEC("tc")
int bpf_sk_assign_test(struct __sk_buff *skb)
{
- struct bpf_sock_tuple *tuple, ln = {0};
+ struct bpf_sock_tuple *tuple;
bool ipv4 = false;
bool tcp = false;
int tuple_len;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index 40f161480a2f..b502e5c92e33 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -52,7 +52,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
return result;
}
-SEC("tc")
+SEC("?tc")
int sk_lookup_success(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
@@ -78,7 +78,7 @@ int sk_lookup_success(struct __sk_buff *skb)
return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
}
-SEC("tc")
+SEC("?tc")
int sk_lookup_success_simple(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -90,7 +90,7 @@ int sk_lookup_success_simple(struct __sk_buff *skb)
return 0;
}
-SEC("tc")
+SEC("?tc")
int err_use_after_free(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -105,7 +105,7 @@ int err_use_after_free(struct __sk_buff *skb)
return family;
}
-SEC("tc")
+SEC("?tc")
int err_modify_sk_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -120,7 +120,7 @@ int err_modify_sk_pointer(struct __sk_buff *skb)
return 0;
}
-SEC("tc")
+SEC("?tc")
int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -134,7 +134,7 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
return 0;
}
-SEC("tc")
+SEC("?tc")
int err_no_release(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -143,7 +143,7 @@ int err_no_release(struct __sk_buff *skb)
return 0;
}
-SEC("tc")
+SEC("?tc")
int err_release_twice(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -155,7 +155,7 @@ int err_release_twice(struct __sk_buff *skb)
return 0;
}
-SEC("tc")
+SEC("?tc")
int err_release_unchecked(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("tc")
+SEC("?tc")
int err_no_release_subcall(struct __sk_buff *skb)
{
lookup_no_release(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
index e6cb09259408..1926facba122 100644
--- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {};
char ctx_regs[PT_REGS_SIZE] = {};
int uprobe_res = 0;
-SEC("uprobe/trigger_func")
+SEC("uprobe")
int handle_uprobe(struct pt_regs *ctx)
{
struct task_struct *current;
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
new file mode 100644
index 000000000000..ab75522e2eeb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int uprobe_byname_parm1 = 0;
+int uprobe_byname_ran = 0;
+int uretprobe_byname_rc = 0;
+int uretprobe_byname_ran = 0;
+size_t uprobe_byname2_parm1 = 0;
+int uprobe_byname2_ran = 0;
+char *uretprobe_byname2_rc = NULL;
+int uretprobe_byname2_ran = 0;
+
+int test_pid;
+
+/* This program cannot auto-attach, but that should not stop other
+ * programs from attaching.
+ */
+SEC("uprobe")
+int handle_uprobe_noautoattach(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx);
+ uprobe_byname_ran = 1;
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+ uretprobe_byname_rc = PT_REGS_RC_CORE(ctx);
+ uretprobe_byname_ran = 2;
+ return 0;
+}
+
+
+SEC("uprobe/libc.so.6:malloc")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx);
+ uprobe_byname2_ran = 3;
+ return 0;
+}
+
+SEC("uretprobe/libc.so.6:malloc")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx);
+ uretprobe_byname2_ran = 4;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
new file mode 100644
index 000000000000..3539b02bd5f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int urand_pid;
+
+int urand_read_without_sema_call_cnt;
+int urand_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_without_sema")
+int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urand_read_with_sema_call_cnt;
+int urand_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_with_sema")
+int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_without_sema_call_cnt;
+int urandlib_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_without_sema")
+int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_with_sema_call_cnt;
+int urandlib_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_with_sema")
+int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
new file mode 100644
index 000000000000..505aab9a5234
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int my_pid;
+
+int usdt0_called;
+u64 usdt0_cookie;
+int usdt0_arg_cnt;
+int usdt0_arg_ret;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt0_called, 1);
+
+ usdt0_cookie = bpf_usdt_cookie(ctx);
+ usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
+ /* should return -ENOENT for any arg_num */
+ usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+ return 0;
+}
+
+int usdt3_called;
+u64 usdt3_cookie;
+int usdt3_arg_cnt;
+int usdt3_arg_rets[3];
+u64 usdt3_args[3];
+
+SEC("usdt//proc/self/exe:test:usdt3")
+int usdt3(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt3_called, 1);
+
+ usdt3_cookie = bpf_usdt_cookie(ctx);
+ usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt3_args[0] = (int)tmp;
+
+ usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
+ usdt3_args[1] = (long)tmp;
+
+ usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
+ usdt3_args[2] = (uintptr_t)tmp;
+
+ return 0;
+}
+
+int usdt12_called;
+u64 usdt12_cookie;
+int usdt12_arg_cnt;
+u64 usdt12_args[12];
+
+SEC("usdt//proc/self/exe:test:usdt12")
+int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
+ long a6, __u64 a7, uintptr_t a8, int a9, short a10,
+ short a11, signed char a12)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt12_called, 1);
+
+ usdt12_cookie = bpf_usdt_cookie(ctx);
+ usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt12_args[0] = a1;
+ usdt12_args[1] = a2;
+ usdt12_args[2] = a3;
+ usdt12_args[3] = a4;
+ usdt12_args[4] = a5;
+ usdt12_args[5] = a6;
+ usdt12_args[6] = a7;
+ usdt12_args[7] = a8;
+ usdt12_args[8] = a9;
+ usdt12_args[9] = a10;
+ usdt12_args[10] = a11;
+ usdt12_args[11] = a12;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
new file mode 100644
index 000000000000..aa6de32b50d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+/* this file is linked together with test_usdt.c to validate that usdt.bpf.h
+ * can be included in multiple .bpf.c files forming single final BPF object
+ * file
+ */
+
+extern int my_pid;
+
+int usdt_100_called;
+int usdt_100_sum;
+
+SEC("usdt//proc/self/exe:test:usdt_100")
+int BPF_USDT(usdt_100, int x)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_100_called, 1);
+ __sync_fetch_and_add(&usdt_100_sum, x);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 596c4e71bf3a..125d872d7981 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -564,22 +564,22 @@ static bool get_packet_dst(struct real_definition **real,
hash = get_packet_hash(pckt, hash_16bytes);
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
key = 2 * vip_info->vip_num + hash % 2;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
- return 0;
+ return false;
key = *real_pos;
*real = bpf_map_lookup_elem(&reals, &key);
if (!(*real))
- return 0;
+ return false;
if (!(vip_info->flags & (1 << 1))) {
__u32 conn_rate_key = 512 + 2;
struct lb_stats *conn_rate_stats =
bpf_map_lookup_elem(&stats, &conn_rate_key);
if (!conn_rate_stats)
- return 1;
+ return true;
cur_time = bpf_ktime_get_ns();
if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
conn_rate_stats->v1 = 1;
@@ -587,14 +587,14 @@ static bool get_packet_dst(struct real_definition **real,
} else {
conn_rate_stats->v1 += 1;
if (conn_rate_stats->v1 >= 1)
- return 1;
+ return true;
}
if (pckt->flow.proto == IPPROTO_UDP)
new_dst_lru.atime = cur_time;
new_dst_lru.pos = key;
bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
}
- return 1;
+ return true;
}
__attribute__ ((noinline))
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2ab049b54d6c..694e7cec1823 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx)
return -22;
}
-SEC("uprobe/self/uprobe_target")
+SEC("uprobe")
int bench_trigger_uprobe(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h
new file mode 100644
index 000000000000..733045a52771
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt-config.h
@@ -0,0 +1,6 @@
+/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure.
+
+ This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
+ indicate whether the assembler supports "?" in .pushsection directives. */
+
+#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
new file mode 100644
index 000000000000..ca0162b4dc57
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -0,0 +1,513 @@
+/* <sys/sdt.h> - Systemtap static probe definition macros.
+
+ This file is dedicated to the public domain, pursuant to CC0
+ (https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H 1
+
+/*
+ This file defines a family of macros
+
+ STAP_PROBEn(op1, ..., opn)
+
+ that emit a nop into the instruction stream, and some data into an auxiliary
+ note section. The data in the note section describes the operands, in terms
+ of size and location. Each location is encoded as assembler operand string.
+ Consumer tools such as gdb or systemtap insert breakpoints on top of
+ the nop, and decode the location operand-strings, like an assembler,
+ to find the values being passed.
+
+ The operand strings are selected by the compiler for each operand.
+ They are constrained by gcc inline-assembler codes. The default is:
+
+ #define STAP_SDT_ARG_CONSTRAINT nor
+
+ This is a good default if the operands tend to be integral and
+ moderate in number (smaller than number of registers). In other
+ cases, the compiler may report "'asm' requires impossible reload" or
+ similar. In this case, consider simplifying the macro call (fewer
+ and simpler operands), reduce optimization, or override the default
+ constraints string via:
+
+ #define STAP_SDT_ARG_CONSTRAINT g
+ #include <sys/sdt.h>
+
+ See also:
+ https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+
+
+
+#ifdef __ASSEMBLER__
+# define _SDT_PROBE(provider, name, n, arglist) \
+ _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \
+ _SDT_ASM_BASE
+# define _SDT_ASM_1(x) x;
+# define _SDT_ASM_2(a, b) a,b;
+# define _SDT_ASM_3(a, b, c) a,b,c;
+# define _SDT_ASM_5(a, b, c, d, e) a,b,c,d,e;
+# define _SDT_ASM_STRING_1(x) .asciz #x;
+# define _SDT_ASM_SUBSTR_1(x) .ascii #x;
+# define _SDT_DEPAREN_0() /* empty */
+# define _SDT_DEPAREN_1(a) a
+# define _SDT_DEPAREN_2(a,b) a b
+# define _SDT_DEPAREN_3(a,b,c) a b c
+# define _SDT_DEPAREN_4(a,b,c,d) a b c d
+# define _SDT_DEPAREN_5(a,b,c,d,e) a b c d e
+# define _SDT_DEPAREN_6(a,b,c,d,e,f) a b c d e f
+# define _SDT_DEPAREN_7(a,b,c,d,e,f,g) a b c d e f g
+# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h) a b c d e f g h
+# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i) a b c d e f g h i
+# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j) a b c d e f g h i j
+# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k) a b c d e f g h i j k
+# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l) a b c d e f g h i j k l
+#else
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \
+ __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore));
+#else
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name)
+#endif
+
+# define _SDT_PROBE(provider, name, n, arglist) \
+ do { \
+ _SDT_NOTE_SEMAPHORE_USE(provider, name); \
+ __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \
+ :: _SDT_ASM_OPERANDS_##n arglist); \
+ __asm__ __volatile__ (_SDT_ASM_BASE); \
+ } while (0)
+# define _SDT_S(x) #x
+# define _SDT_ASM_1(x) _SDT_S(x) "\n"
+# define _SDT_ASM_2(a, b) _SDT_S(a) "," _SDT_S(b) "\n"
+# define _SDT_ASM_3(a, b, c) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "\n"
+# define _SDT_ASM_5(a, b, c, d, e) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "," _SDT_S(d) "," \
+ _SDT_S(e) "\n"
+# define _SDT_ASM_ARGS(n) _SDT_ASM_TEMPLATE_##n
+# define _SDT_ASM_STRING_1(x) _SDT_ASM_1(.asciz #x)
+# define _SDT_ASM_SUBSTR_1(x) _SDT_ASM_1(.ascii #x)
+
+# define _SDT_ARGFMT(no) _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \
+ _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no))
+
+
+# ifndef STAP_SDT_ARG_CONSTRAINT
+# if defined __powerpc__
+# define STAP_SDT_ARG_CONSTRAINT nZr
+# elif defined __arm__
+# define STAP_SDT_ARG_CONSTRAINT g
+# else
+# define STAP_SDT_ARG_CONSTRAINT nor
+# endif
+# endif
+
+# define _SDT_STRINGIFY(x) #x
+# define _SDT_ARG_CONSTRAINT_STRING(x) _SDT_STRINGIFY(x)
+/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler
+ macros _SDT_SIZE and _SDT_TYPE */
+# define _SDT_ARG(n, x) \
+ [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \
+ [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x))
+#endif
+#define _SDT_ASM_STRING(x) _SDT_ASM_STRING_1(x)
+#define _SDT_ASM_SUBSTR(x) _SDT_ASM_SUBSTR_1(x)
+
+#define _SDT_ARGARRAY(x) (__builtin_classify_type (x) == 14 \
+ || __builtin_classify_type (x) == 5)
+
+#ifdef __cplusplus
+# define _SDT_ARGSIGNED(x) (!_SDT_ARGARRAY (x) \
+ && __sdt_type<__typeof (x)>::__sdt_signed)
+# define _SDT_ARGSIZE(x) (_SDT_ARGARRAY (x) \
+ ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+
+# include <cstddef>
+
+template<typename __sdt_T>
+struct __sdt_type
+{
+ static const bool __sdt_signed = false;
+};
+
+#define __SDT_ALWAYS_SIGNED(T) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = true; };
+#define __SDT_COND_SIGNED(T,CT) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); };
+__SDT_ALWAYS_SIGNED(signed char)
+__SDT_ALWAYS_SIGNED(short)
+__SDT_ALWAYS_SIGNED(int)
+__SDT_ALWAYS_SIGNED(long)
+__SDT_ALWAYS_SIGNED(long long)
+__SDT_ALWAYS_SIGNED(volatile signed char)
+__SDT_ALWAYS_SIGNED(volatile short)
+__SDT_ALWAYS_SIGNED(volatile int)
+__SDT_ALWAYS_SIGNED(volatile long)
+__SDT_ALWAYS_SIGNED(volatile long long)
+__SDT_ALWAYS_SIGNED(const signed char)
+__SDT_ALWAYS_SIGNED(const short)
+__SDT_ALWAYS_SIGNED(const int)
+__SDT_ALWAYS_SIGNED(const long)
+__SDT_ALWAYS_SIGNED(const long long)
+__SDT_ALWAYS_SIGNED(const volatile signed char)
+__SDT_ALWAYS_SIGNED(const volatile short)
+__SDT_ALWAYS_SIGNED(const volatile int)
+__SDT_ALWAYS_SIGNED(const volatile long)
+__SDT_ALWAYS_SIGNED(const volatile long long)
+__SDT_COND_SIGNED(char, char)
+__SDT_COND_SIGNED(wchar_t, wchar_t)
+__SDT_COND_SIGNED(volatile char, char)
+__SDT_COND_SIGNED(volatile wchar_t, wchar_t)
+__SDT_COND_SIGNED(const char, char)
+__SDT_COND_SIGNED(const wchar_t, wchar_t)
+__SDT_COND_SIGNED(const volatile char, char)
+__SDT_COND_SIGNED(const volatile wchar_t, wchar_t)
+#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+/* __SDT_COND_SIGNED(char16_t) */
+/* __SDT_COND_SIGNED(char32_t) */
+#endif
+
+template<typename __sdt_E>
+struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {};
+
+template<typename __sdt_E, size_t __sdt_N>
+struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {};
+
+#elif !defined(__ASSEMBLER__)
+__extension__ extern unsigned long long __sdt_unsp;
+# define _SDT_ARGINTTYPE(x) \
+ __typeof (__builtin_choose_expr (((__builtin_classify_type (x) \
+ + 3) & -4) == 4, (x), 0U))
+# define _SDT_ARGSIGNED(x) \
+ (!__extension__ \
+ (__builtin_constant_p ((((unsigned long long) \
+ (_SDT_ARGINTTYPE (x)) __sdt_unsp) \
+ & ((unsigned long long)1 << (sizeof (unsigned long long) \
+ * __CHAR_BIT__ - 1))) == 0) \
+ || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0))
+# define _SDT_ARGSIZE(x) \
+ (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+#endif
+
+#if defined __powerpc__ || defined __powerpc64__
+# define _SDT_ARGTMPL(id) %I[id]%[id]
+#elif defined __i386__
+# define _SDT_ARGTMPL(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+# define _SDT_ARGTMPL(id) %[id]
+#endif
+
+/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ operand note format.
+
+ The named register may be a longer or shorter (!) alias for the
+ storage where the value in question is found. For example, on
+ i386, 64-bit value may be put in register pairs, and the register
+ name stored would identify just one of them. Previously, gcc was
+ asked to emit the %w[id] (16-bit alias of some registers holding
+ operands), even when a wider 32-bit value was used.
+
+ Bottom line: the byte-width given before the @ sign governs. If
+ there is a mismatch between that width and that of the named
+ register, then a sys/sdt.h note consumer may need to employ
+ architecture-specific heuristics to figure out where the compiler
+ has actually put the complete value.
+*/
+
+#ifdef __LP64__
+# define _SDT_ASM_ADDR .8byte
+#else
+# define _SDT_ASM_ADDR .4byte
+#endif
+
+/* The ia64 and s390 nop instructions take an argument. */
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define _SDT_NOP nop 0
+#else
+#define _SDT_NOP nop
+#endif
+
+#define _SDT_NOTE_NAME "stapsdt"
+#define _SDT_NOTE_TYPE 3
+
+/* If the assembler supports the necessary feature, then we can play
+ nice with code in COMDAT sections, which comes up in C++ code.
+ Without that assembler support, some combinations of probe placements
+ in certain kinds of C++ code may produce link-time errors. */
+#include "sdt-config.h"
+#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT
+# define _SDT_ASM_AUTOGROUP "?"
+#else
+# define _SDT_ASM_AUTOGROUP ""
+#endif
+
+#define _SDT_DEF_MACROS \
+ _SDT_ASM_1(.altmacro) \
+ _SDT_ASM_1(.macro _SDT_SIGN x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.iflt \\x) \
+ _SDT_ASM_1(.ascii "-") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.ascii "\x") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE x) \
+ _SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8)) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_2(.ifc 8,\\x) \
+ _SDT_ASM_1(.ascii "f") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.ascii "@") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE x) \
+ _SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff))) \
+ _SDT_ASM_1(.endm)
+
+#define _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(.purgem _SDT_SIGN) \
+ _SDT_ASM_1(.purgem _SDT_SIZE_) \
+ _SDT_ASM_1(.purgem _SDT_SIZE) \
+ _SDT_ASM_1(.purgem _SDT_TYPE_) \
+ _SDT_ASM_1(.purgem _SDT_TYPE)
+
+#define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \
+ _SDT_DEF_MACROS \
+ _SDT_ASM_1(990: _SDT_NOP) \
+ _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
+ _SDT_ASM_1( .balign 4) \
+ _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \
+ _SDT_ASM_1(991: .asciz _SDT_NOTE_NAME) \
+ _SDT_ASM_1(992: .balign 4) \
+ _SDT_ASM_1(993: _SDT_ASM_ADDR 990b) \
+ _SDT_ASM_1( _SDT_ASM_ADDR _.stapsdt.base) \
+ _SDT_SEMAPHORE(provider,name) \
+ _SDT_ASM_STRING(provider) \
+ _SDT_ASM_STRING(name) \
+ pack_args args \
+ _SDT_ASM_SUBSTR(\x00) \
+ _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(994: .balign 4) \
+ _SDT_ASM_1( .popsection)
+
+#define _SDT_ASM_BASE \
+ _SDT_ASM_1(.ifndef _.stapsdt.base) \
+ _SDT_ASM_5( .pushsection .stapsdt.base,"aG","progbits", \
+ .stapsdt.base,comdat) \
+ _SDT_ASM_1( .weak _.stapsdt.base) \
+ _SDT_ASM_1( .hidden _.stapsdt.base) \
+ _SDT_ASM_1( _.stapsdt.base: .space 1) \
+ _SDT_ASM_2( .size _.stapsdt.base, 1) \
+ _SDT_ASM_1( .popsection) \
+ _SDT_ASM_1(.endif)
+
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_SEMAPHORE(p,n) \
+ _SDT_ASM_1( _SDT_ASM_ADDR p##_##n##_semaphore)
+#else
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR 0)
+#endif
+
+#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20)
+#define _SDT_ASM_TEMPLATE_0 /* no arguments */
+#define _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(1)
+#define _SDT_ASM_TEMPLATE_2 _SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2)
+#define _SDT_ASM_TEMPLATE_3 _SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3)
+#define _SDT_ASM_TEMPLATE_4 _SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4)
+#define _SDT_ASM_TEMPLATE_5 _SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5)
+#define _SDT_ASM_TEMPLATE_6 _SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6)
+#define _SDT_ASM_TEMPLATE_7 _SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7)
+#define _SDT_ASM_TEMPLATE_8 _SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8)
+#define _SDT_ASM_TEMPLATE_9 _SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9)
+#define _SDT_ASM_TEMPLATE_10 _SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10)
+#define _SDT_ASM_TEMPLATE_11 _SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11)
+#define _SDT_ASM_TEMPLATE_12 _SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12)
+#define _SDT_ASM_OPERANDS_0() [__sdt_dummy] "g" (0)
+#define _SDT_ASM_OPERANDS_1(arg1) _SDT_ARG(1, arg1)
+#define _SDT_ASM_OPERANDS_2(arg1, arg2) \
+ _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2)
+#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \
+ _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3)
+#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \
+ _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4)
+#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \
+ _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5)
+#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6)
+#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7)
+#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
+ _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \
+ _SDT_ARG(8, arg8)
+#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \
+ _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \
+ _SDT_ARG(9, arg9)
+#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \
+ _SDT_ARG(10, arg10)
+#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \
+ _SDT_ARG(11, arg11)
+#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \
+ _SDT_ARG(12, arg12)
+
+/* These macros can be used in C, C++, or assembly code.
+ In assembly code the arguments should use normal assembly operand syntax. */
+
+#define STAP_PROBE(provider, name) \
+ _SDT_PROBE(provider, name, 0, ())
+#define STAP_PROBE1(provider, name, arg1) \
+ _SDT_PROBE(provider, name, 1, (arg1))
+#define STAP_PROBE2(provider, name, arg1, arg2) \
+ _SDT_PROBE(provider, name, 2, (arg1, arg2))
+#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \
+ _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3))
+#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \
+ _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4))
+#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \
+ _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5))
+#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6))
+#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7))
+#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \
+ _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8))
+#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\
+ _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
+#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_PROBE(provider, name, 10, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10))
+#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_PROBE(provider, name, 11, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11))
+#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_PROBE(provider, name, 12, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12))
+
+/* This STAP_PROBEV macro can be used in variadic scenarios, where the
+ number of probe arguments is not known until compile time. Since
+ variadic macro support may vary with compiler options, you must
+ pre-#define SDT_USE_VARIADIC to enable this type of probe.
+
+ The trick to count __VA_ARGS__ was inspired by this post by
+ Laurent Deniau <laurent.deniau@cern.ch>:
+ http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5
+
+ Note that our _SDT_NARG is called with an extra 0 arg that's not
+ counted, so we don't have to worry about the behavior of macros
+ called without any arguments. */
+
+#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0)
+#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N
+#ifdef SDT_USE_VARIADIC
+#define _SDT_PROBE_N(provider, name, N, ...) \
+ _SDT_PROBE(provider, name, N, (__VA_ARGS__))
+#define STAP_PROBEV(provider, name, ...) \
+ _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__)
+#endif
+
+/* These macros are for use in asm statements. You must compile
+ with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro.
+
+ The STAP_PROBE_ASM macro generates a quoted string to be used in the
+ template portion of the asm statement, concatenated with strings that
+ contain the actual assembly code around the probe site.
+
+ For example:
+
+ asm ("before\n"
+ STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi))
+ "after");
+
+ emits the assembly code for "before\nafter", with a probe in between.
+ The probe arguments are the %eax register, and the value of the memory
+ word located 4 bytes past the address in the %esi register. Note that
+ because this is a simple asm, not a GNU C extended asm statement, these
+ % characters do not need to be doubled to generate literal %reg names.
+
+ In a GNU C extended asm statement, the probe arguments can be specified
+ using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments. The paired
+ macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments,
+ and appears in the input operand list of the asm statement. For example:
+
+ asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand
+ STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3))
+ "otherinsn %[namedarg]"
+ : "r" (outvar)
+ : "g" (some_value), [namedarg] "i" (1234),
+ STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234));
+
+ This is just like writing:
+
+ STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234));
+
+ but the probe site is right between "someinsn" and "otherinsn".
+
+ The probe arguments in STAP_PROBE_ASM can be given as assembly
+ operands instead, even inside a GNU C extended asm statement.
+ Note that these can use operand templates like %0 or %[name],
+ and likewise they must write %%reg for a literal operand of %reg. */
+
+#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__))
+#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__)
+#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__)
+#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__)
+#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__)
+
+#if __STDC_VERSION__ >= 199901L
+# define STAP_PROBE_ASM(provider, name, ...) \
+ _SDT_ASM_BODY_N(provider, name, __VA_ARGS__) \
+ _SDT_ASM_BASE
+# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__)
+#else
+# define STAP_PROBE_ASM(provider, name, args) \
+ _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args)) \
+ _SDT_ASM_BASE
+#endif
+#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_"
+
+
+/* DTrace compatible macro names. */
+#define DTRACE_PROBE(provider,probe) \
+ STAP_PROBE(provider,probe)
+#define DTRACE_PROBE1(provider,probe,parm1) \
+ STAP_PROBE1(provider,probe,parm1)
+#define DTRACE_PROBE2(provider,probe,parm1,parm2) \
+ STAP_PROBE2(provider,probe,parm1,parm2)
+#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \
+ STAP_PROBE3(provider,probe,parm1,parm2,parm3)
+#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4) \
+ STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4)
+#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) \
+ STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)
+#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \
+ STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6)
+#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \
+ STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7)
+#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \
+ STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8)
+#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \
+ STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9)
+#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \
+ STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10)
+#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \
+ STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11)
+#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \
+ STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12)
+
+
+#endif /* sys/sdt.h */
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 6bf21e47882a..c0e7acd698ed 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -180,7 +180,7 @@ class FileExtractor(object):
@enum_name: name of the enum to parse
"""
start_marker = re.compile(f'enum {enum_name} {{\n')
- pattern = re.compile('^\s*(BPF_\w+),?$')
+ pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
end_marker = re.compile('^};')
parser = BlockParser(self.reader)
parser.search_block(start_marker)
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index d6a1be4d8020..0861ea60dcdd 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -6,7 +6,7 @@
#include <stdlib.h>
#include <sys/sysinfo.h>
-#include "bpf_rlimit.h"
+#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "testing_helpers.h"
@@ -44,13 +44,16 @@ int main(int argc, char **argv)
unsigned long long *percpu_value;
int cpu, nproc;
- nproc = get_nprocs_conf();
+ nproc = bpf_num_possible_cpus();
percpu_value = malloc(sizeof(*percpu_value) * nproc);
if (!percpu_value) {
printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
goto err;
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key),
sizeof(value), 0, NULL);
if (map_fd < 0) {
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index c299d3452695..7886265846a0 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -15,7 +15,6 @@
#include "cgroup_helpers.h"
#include "testing_helpers.h"
-#include "bpf_rlimit.h"
#define DEV_CGROUP_PROG "./dev_cgroup.o"
@@ -28,6 +27,9 @@ int main(int argc, char **argv)
int prog_fd, cgroup_fd;
__u32 prog_cnt;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
&obj, &prog_fd)) {
printf("Failed to load DEV_CGROUP program\n");
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index aa294612e0a7..c028d621c744 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -26,7 +26,6 @@
#include <bpf/bpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
struct tlpm_node {
struct tlpm_node *next;
@@ -409,16 +408,13 @@ static void test_lpm_ipaddr(void)
/* Test some lookups that should not match any entry */
inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
- assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
- assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
- assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT);
close(map_fd_ipv4);
close(map_fd_ipv6);
@@ -475,18 +471,15 @@ static void test_lpm_delete(void)
/* remove non-existent node */
key->prefixlen = 32;
inet_pton(AF_INET, "10.0.0.1", key->data);
- assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
key->prefixlen = 30; // unused prefix so far
inet_pton(AF_INET, "192.255.0.0", key->data);
- assert(bpf_map_delete_elem(map_fd, key) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
key->prefixlen = 16; // same prefix as the root node
inet_pton(AF_INET, "192.255.0.0", key->data);
- assert(bpf_map_delete_elem(map_fd, key) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
/* assert initial lookup */
key->prefixlen = 32;
@@ -531,8 +524,7 @@ static void test_lpm_delete(void)
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.128.1", key->data);
- assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
close(map_fd);
}
@@ -553,8 +545,7 @@ static void test_lpm_get_next_key(void)
assert(map_fd >= 0);
/* empty tree. get_next_key should return ENOENT */
- assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT);
/* get and verify the first key, get the second one should fail. */
key_p->prefixlen = 16;
@@ -566,8 +557,7 @@ static void test_lpm_get_next_key(void)
assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
key_p->data[1] == 168);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* no exact matching key should get the first one in post order. */
key_p->prefixlen = 8;
@@ -591,8 +581,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total three) */
key_p->prefixlen = 24;
@@ -615,8 +604,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total four) */
key_p->prefixlen = 24;
@@ -644,8 +632,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total five) */
key_p->prefixlen = 28;
@@ -679,8 +666,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* no exact matching key should return the first one in post order */
key_p->prefixlen = 22;
@@ -791,6 +777,9 @@ int main(void)
/* we want predictable, pseudo random tests */
srand(0xf00ba1);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
test_lpm_basic();
test_lpm_order();
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 563bbe18c172..4d0650cfb5cd 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -18,7 +18,6 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "../../../include/linux/filter.h"
#define LOCAL_FREE_TARGET (128)
@@ -176,24 +175,20 @@ static void test_lru_sanity0(int map_type, int map_flags)
BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
- errno == EINVAL);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL);
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
@@ -201,8 +196,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and mark the ref bit to
* stop LRU from removing key=1
@@ -218,8 +212,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
/* key=2 has been removed from the LRU */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* lookup elem key=1 and delete it, then check it doesn't exist */
key = 1;
@@ -382,8 +375,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
end_key = 1 + batch_size;
value[0] = 4321;
for (key = 1; key < end_key; key++) {
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -563,8 +555,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value));
/* Cannot find the last key because it was removed by LRU */
- assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT);
}
/* Test map with only one element */
@@ -712,21 +703,18 @@ static void test_lru_sanity7(int map_type, int map_flags)
BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
@@ -734,8 +722,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and mark the ref bit to
* stop LRU from removing key=1
@@ -758,8 +745,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
/* key=2 has been removed from the LRU */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -806,21 +792,18 @@ static void test_lru_sanity8(int map_type, int map_flags)
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
assert(!bpf_map_update_elem(expected_map_fd, &key, value,
@@ -830,8 +813,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and do _not_ mark ref bit.
* this will be evicted on next update.
@@ -854,8 +836,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
/* key=1 has been removed from the LRU */
key = 1;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -878,6 +859,9 @@ int main(int argc, char **argv)
assert(nr_cpus != -1);
printf("nr_cpus:%d\n\n", nr_cpus);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index edaffd43da83..6cd6ef9fc20b 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -184,7 +184,7 @@ def bpftool_prog_list(expected=None, ns=""):
def bpftool_map_list(expected=None, ns=""):
_, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
# Remove the base maps
- maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names]
+ maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names]
if expected is not None:
if len(maps) != expected:
fail(True, "%d BPF maps loaded, expected %d" %
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2ecb73a65206..c536d1d29d57 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -3,6 +3,7 @@
*/
#define _GNU_SOURCE
#include "test_progs.h"
+#include "testing_helpers.h"
#include "cgroup_helpers.h"
#include <argp.h>
#include <pthread.h>
@@ -50,19 +51,8 @@ struct prog_test_def {
int test_num;
void (*run_test)(void);
void (*run_serial_test)(void);
- bool force_log;
- int error_cnt;
- int skip_cnt;
- int sub_succ_cnt;
bool should_run;
- bool tested;
bool need_cgroup_cleanup;
-
- char *subtest_name;
- int subtest_num;
-
- /* store counts before subtest started */
- int old_error_cnt;
};
/* Override C runtime library's usleep() implementation to ensure nanosleep()
@@ -84,12 +74,13 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
int i;
for (i = 0; i < sel->blacklist.cnt; i++) {
- if (glob_match(name, sel->blacklist.strs[i]))
+ if (glob_match(name, sel->blacklist.tests[i].name) &&
+ !sel->blacklist.tests[i].subtest_cnt)
return false;
}
for (i = 0; i < sel->whitelist.cnt; i++) {
- if (glob_match(name, sel->whitelist.strs[i]))
+ if (glob_match(name, sel->whitelist.tests[i].name))
return true;
}
@@ -99,32 +90,69 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
return num < sel->num_set_len && sel->num_set[num];
}
-static void dump_test_log(const struct prog_test_def *test, bool failed)
+static bool should_run_subtest(struct test_selector *sel,
+ struct test_selector *subtest_sel,
+ int subtest_num,
+ const char *test_name,
+ const char *subtest_name)
{
- if (stdout == env.stdout)
- return;
+ int i, j;
- /* worker always holds log */
- if (env.worker_id != -1)
- return;
+ for (i = 0; i < sel->blacklist.cnt; i++) {
+ if (glob_match(test_name, sel->blacklist.tests[i].name)) {
+ if (!sel->blacklist.tests[i].subtest_cnt)
+ return false;
+
+ for (j = 0; j < sel->blacklist.tests[i].subtest_cnt; j++) {
+ if (glob_match(subtest_name,
+ sel->blacklist.tests[i].subtests[j]))
+ return false;
+ }
+ }
+ }
- fflush(stdout); /* exports env.log_buf & env.log_cnt */
+ for (i = 0; i < sel->whitelist.cnt; i++) {
+ if (glob_match(test_name, sel->whitelist.tests[i].name)) {
+ if (!sel->whitelist.tests[i].subtest_cnt)
+ return true;
- if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
- if (env.log_cnt) {
- env.log_buf[env.log_cnt] = '\0';
- fprintf(env.stdout, "%s", env.log_buf);
- if (env.log_buf[env.log_cnt - 1] != '\n')
- fprintf(env.stdout, "\n");
+ for (j = 0; j < sel->whitelist.tests[i].subtest_cnt; j++) {
+ if (glob_match(subtest_name,
+ sel->whitelist.tests[i].subtests[j]))
+ return true;
+ }
}
}
+
+ if (!sel->whitelist.cnt && !subtest_sel->num_set)
+ return true;
+
+ return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num];
}
-static void skip_account(void)
+static void dump_test_log(const struct prog_test_def *test,
+ const struct test_state *test_state,
+ bool force_failed)
{
- if (env.test->skip_cnt) {
- env.skip_cnt++;
- env.test->skip_cnt = 0;
+ bool failed = test_state->error_cnt > 0 || force_failed;
+
+ /* worker always holds log */
+ if (env.worker_id != -1)
+ return;
+
+ fflush(stdout); /* exports test_state->log_buf & test_state->log_cnt */
+
+ fprintf(env.stdout, "#%-3d %s:%s\n",
+ test->test_num, test->test_name,
+ failed ? "FAIL" : (test_state->skip_cnt ? "SKIP" : "OK"));
+
+ if (env.verbosity > VERBOSE_NONE || test_state->force_log || failed) {
+ if (test_state->log_cnt) {
+ test_state->log_buf[test_state->log_cnt] = '\0';
+ fprintf(env.stdout, "%s", test_state->log_buf);
+ if (test_state->log_buf[test_state->log_cnt - 1] != '\n')
+ fprintf(env.stdout, "\n");
+ }
}
}
@@ -135,7 +163,6 @@ static void stdio_restore(void);
*/
static void reset_affinity(void)
{
-
cpu_set_t cpuset;
int i, err;
@@ -178,68 +205,78 @@ static void restore_netns(void)
void test__end_subtest(void)
{
struct prog_test_def *test = env.test;
- int sub_error_cnt = test->error_cnt - test->old_error_cnt;
-
- dump_test_log(test, sub_error_cnt);
+ struct test_state *state = env.test_state;
+ int sub_error_cnt = state->error_cnt - state->old_error_cnt;
fprintf(stdout, "#%d/%d %s/%s:%s\n",
- test->test_num, test->subtest_num, test->test_name, test->subtest_name,
- sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+ test->test_num, state->subtest_num, test->test_name, state->subtest_name,
+ sub_error_cnt ? "FAIL" : (state->subtest_skip_cnt ? "SKIP" : "OK"));
- if (sub_error_cnt)
- test->error_cnt++;
- else if (test->skip_cnt == 0)
- test->sub_succ_cnt++;
- skip_account();
+ if (sub_error_cnt == 0) {
+ if (state->subtest_skip_cnt == 0) {
+ state->sub_succ_cnt++;
+ } else {
+ state->subtest_skip_cnt = 0;
+ state->skip_cnt++;
+ }
+ }
- free(test->subtest_name);
- test->subtest_name = NULL;
+ free(state->subtest_name);
+ state->subtest_name = NULL;
}
-bool test__start_subtest(const char *name)
+bool test__start_subtest(const char *subtest_name)
{
struct prog_test_def *test = env.test;
+ struct test_state *state = env.test_state;
- if (test->subtest_name)
+ if (state->subtest_name)
test__end_subtest();
- test->subtest_num++;
+ state->subtest_num++;
- if (!name || !name[0]) {
+ if (!subtest_name || !subtest_name[0]) {
fprintf(env.stderr,
"Subtest #%d didn't provide sub-test name!\n",
- test->subtest_num);
+ state->subtest_num);
return false;
}
- if (!should_run(&env.subtest_selector, test->subtest_num, name))
+ if (!should_run_subtest(&env.test_selector,
+ &env.subtest_selector,
+ state->subtest_num,
+ test->test_name,
+ subtest_name))
return false;
- test->subtest_name = strdup(name);
- if (!test->subtest_name) {
+ state->subtest_name = strdup(subtest_name);
+ if (!state->subtest_name) {
fprintf(env.stderr,
"Subtest #%d: failed to copy subtest name!\n",
- test->subtest_num);
+ state->subtest_num);
return false;
}
- env.test->old_error_cnt = env.test->error_cnt;
+ state->old_error_cnt = state->error_cnt;
return true;
}
void test__force_log(void)
{
- env.test->force_log = true;
+ env.test_state->force_log = true;
}
void test__skip(void)
{
- env.test->skip_cnt++;
+ if (env.test_state->subtest_name)
+ env.test_state->subtest_skip_cnt++;
+ else
+ env.test_state->skip_cnt++;
}
void test__fail(void)
{
- env.test->error_cnt++;
+ env.test_state->error_cnt++;
}
int test__join_cgroup(const char *path)
@@ -472,8 +509,11 @@ static struct prog_test_def prog_test_defs[] = {
#include <prog_tests/tests.h>
#undef DEFINE_TEST
};
+
static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+static struct test_state test_states[ARRAY_SIZE(prog_test_defs)];
+
const char *argp_program_version = "test_progs 0.1";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
static const char argp_program_doc[] = "BPF selftests test runner";
@@ -527,63 +567,29 @@ static int libbpf_print_fn(enum libbpf_print_level level,
return 0;
}
-static void free_str_set(const struct str_set *set)
+static void free_test_filter_set(const struct test_filter_set *set)
{
- int i;
+ int i, j;
if (!set)
return;
- for (i = 0; i < set->cnt; i++)
- free((void *)set->strs[i]);
- free(set->strs);
-}
-
-static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
-{
- char *input, *state = NULL, *next, **tmp, **strs = NULL;
- int i, cnt = 0;
-
- input = strdup(s);
- if (!input)
- return -ENOMEM;
-
- while ((next = strtok_r(state ? NULL : input, ",", &state))) {
- tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
- if (!tmp)
- goto err;
- strs = tmp;
-
- if (is_glob_pattern) {
- strs[cnt] = strdup(next);
- if (!strs[cnt])
- goto err;
- } else {
- strs[cnt] = malloc(strlen(next) + 2 + 1);
- if (!strs[cnt])
- goto err;
- sprintf(strs[cnt], "*%s*", next);
- }
+ for (i = 0; i < set->cnt; i++) {
+ free((void *)set->tests[i].name);
+ for (j = 0; j < set->tests[i].subtest_cnt; j++)
+ free((void *)set->tests[i].subtests[j]);
- cnt++;
+ free((void *)set->tests[i].subtests);
}
- tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
- if (!tmp)
- goto err;
- memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
- set->strs = (const char **)tmp;
- set->cnt += cnt;
+ free((void *)set->tests);
+}
- free(input);
- free(strs);
- return 0;
-err:
- for (i = 0; i < cnt; i++)
- free(strs[i]);
- free(strs);
- free(input);
- return -ENOMEM;
+static void free_test_selector(struct test_selector *test_selector)
+{
+ free_test_filter_set(&test_selector->blacklist);
+ free_test_filter_set(&test_selector->whitelist);
+ free(test_selector->num_set);
}
extern int extra_prog_load_log_flags;
@@ -615,33 +621,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
- char *subtest_str = strchr(arg, '/');
-
- if (subtest_str) {
- *subtest_str = '\0';
- if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.whitelist,
- key == ARG_TEST_NAME_GLOB_ALLOWLIST))
- return -ENOMEM;
- }
- if (parse_str_list(arg, &env->test_selector.whitelist,
- key == ARG_TEST_NAME_GLOB_ALLOWLIST))
+ if (parse_test_list(arg,
+ &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
break;
}
case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
- char *subtest_str = strchr(arg, '/');
-
- if (subtest_str) {
- *subtest_str = '\0';
- if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.blacklist,
- key == ARG_TEST_NAME_GLOB_DENYLIST))
- return -ENOMEM;
- }
- if (parse_str_list(arg, &env->test_selector.blacklist,
- key == ARG_TEST_NAME_GLOB_DENYLIST))
+ if (parse_test_list(arg,
+ &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
break;
}
@@ -706,7 +696,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return 0;
}
-static void stdio_hijack(void)
+static void stdio_hijack(char **log_buf, size_t *log_cnt)
{
#ifdef __GLIBC__
env.stdout = stdout;
@@ -720,7 +710,7 @@ static void stdio_hijack(void)
/* stdout and stderr -> buffer */
fflush(stdout);
- stdout = open_memstream(&env.log_buf, &env.log_cnt);
+ stdout = open_memstream(log_buf, log_cnt);
if (!stdout) {
stdout = env.stdout;
perror("open_memstream");
@@ -761,8 +751,10 @@ int cd_flavor_subdir(const char *exec_name)
const char *flavor = strrchr(exec_name, '/');
if (!flavor)
- return 0;
- flavor++;
+ flavor = exec_name;
+ else
+ flavor++;
+
flavor = strrchr(flavor, '-');
if (!flavor)
return 0;
@@ -821,7 +813,7 @@ void crash_handler(int signum)
sz = backtrace(bt, ARRAY_SIZE(bt));
if (env.test)
- dump_test_log(env.test, true);
+ dump_test_log(env.test, env.test_state, true);
if (env.stdout)
stdio_restore();
if (env.worker_id != -1)
@@ -843,17 +835,6 @@ static int current_test_idx;
static pthread_mutex_t current_test_lock;
static pthread_mutex_t stdout_output_lock;
-struct test_result {
- int error_cnt;
- int skip_cnt;
- int sub_succ_cnt;
-
- size_t log_cnt;
- char *log_buf;
-};
-
-static struct test_result test_results[ARRAY_SIZE(prog_test_defs)];
-
static inline const char *str_msg(const struct msg *msg, char *buf)
{
switch (msg->type) {
@@ -907,8 +888,12 @@ static int recv_message(int sock, struct msg *msg)
static void run_one_test(int test_num)
{
struct prog_test_def *test = &prog_test_defs[test_num];
+ struct test_state *state = &test_states[test_num];
env.test = test;
+ env.test_state = state;
+
+ stdio_hijack(&state->log_buf, &state->log_cnt);
if (test->run_test)
test->run_test();
@@ -916,17 +901,19 @@ static void run_one_test(int test_num)
test->run_serial_test();
/* ensure last sub-test is finalized properly */
- if (test->subtest_name)
+ if (state->subtest_name)
test__end_subtest();
- test->tested = true;
+ state->tested = true;
- dump_test_log(test, test->error_cnt);
+ dump_test_log(test, state, false);
reset_affinity();
restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
+
+ stdio_restore();
}
struct dispatch_data {
@@ -945,7 +932,7 @@ static void *dispatch_thread(void *ctx)
while (true) {
int test_to_run = -1;
struct prog_test_def *test;
- struct test_result *result;
+ struct test_state *state;
/* grab a test */
{
@@ -992,16 +979,15 @@ static void *dispatch_thread(void *ctx)
if (test_to_run != msg_test_done.test_done.test_num)
goto error;
- test->tested = true;
- result = &test_results[test_to_run];
-
- result->error_cnt = msg_test_done.test_done.error_cnt;
- result->skip_cnt = msg_test_done.test_done.skip_cnt;
- result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt;
+ state = &test_states[test_to_run];
+ state->tested = true;
+ state->error_cnt = msg_test_done.test_done.error_cnt;
+ state->skip_cnt = msg_test_done.test_done.skip_cnt;
+ state->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt;
/* collect all logs */
if (msg_test_done.test_done.have_log) {
- log_fp = open_memstream(&result->log_buf, &result->log_cnt);
+ log_fp = open_memstream(&state->log_buf, &state->log_cnt);
if (!log_fp)
goto error;
@@ -1020,25 +1006,11 @@ static void *dispatch_thread(void *ctx)
fclose(log_fp);
log_fp = NULL;
}
- /* output log */
- {
- pthread_mutex_lock(&stdout_output_lock);
-
- if (result->log_cnt) {
- result->log_buf[result->log_cnt] = '\0';
- fprintf(stdout, "%s", result->log_buf);
- if (result->log_buf[result->log_cnt - 1] != '\n')
- fprintf(stdout, "\n");
- }
-
- fprintf(stdout, "#%d %s:%s\n",
- test->test_num, test->test_name,
- result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
-
- pthread_mutex_unlock(&stdout_output_lock);
- }
-
} /* wait for test done */
+
+ pthread_mutex_lock(&stdout_output_lock);
+ dump_test_log(test, state, false);
+ pthread_mutex_unlock(&stdout_output_lock);
} /* while (true) */
error:
if (env.debug)
@@ -1060,38 +1032,50 @@ done:
return NULL;
}
-static void print_all_error_logs(void)
+static void calculate_summary_and_print_errors(struct test_env *env)
{
int i;
+ int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0;
+
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct test_state *state = &test_states[i];
- if (env.fail_cnt)
- fprintf(stdout, "\nAll error logs:\n");
+ if (!state->tested)
+ continue;
+
+ sub_succ_cnt += state->sub_succ_cnt;
+ skip_cnt += state->skip_cnt;
+
+ if (state->error_cnt)
+ fail_cnt++;
+ else
+ succ_cnt++;
+ }
+
+ if (fail_cnt)
+ printf("\nAll error logs:\n");
/* print error logs again */
for (i = 0; i < prog_test_cnt; i++) {
- struct prog_test_def *test;
- struct test_result *result;
-
- test = &prog_test_defs[i];
- result = &test_results[i];
+ struct prog_test_def *test = &prog_test_defs[i];
+ struct test_state *state = &test_states[i];
- if (!test->tested || !result->error_cnt)
+ if (!state->tested || !state->error_cnt)
continue;
- fprintf(stdout, "\n#%d %s:%s\n",
- test->test_num, test->test_name,
- result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
-
- if (result->log_cnt) {
- result->log_buf[result->log_cnt] = '\0';
- fprintf(stdout, "%s", result->log_buf);
- if (result->log_buf[result->log_cnt - 1] != '\n')
- fprintf(stdout, "\n");
- }
+ dump_test_log(test, state, true);
}
+
+ printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+ succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt);
+
+ env->succ_cnt = succ_cnt;
+ env->sub_succ_cnt = sub_succ_cnt;
+ env->fail_cnt = fail_cnt;
+ env->skip_cnt = skip_cnt;
}
-static int server_main(void)
+static void server_main(void)
{
pthread_t *dispatcher_threads;
struct dispatch_data *data;
@@ -1147,60 +1131,18 @@ static int server_main(void)
for (int i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
- struct test_result *result = &test_results[i];
if (!test->should_run || !test->run_serial_test)
continue;
- stdio_hijack();
-
run_one_test(i);
-
- stdio_restore();
- if (env.log_buf) {
- result->log_cnt = env.log_cnt;
- result->log_buf = strdup(env.log_buf);
-
- free(env.log_buf);
- env.log_buf = NULL;
- env.log_cnt = 0;
- }
- restore_netns();
-
- fprintf(stdout, "#%d %s:%s\n",
- test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
- result->error_cnt = test->error_cnt;
- result->skip_cnt = test->skip_cnt;
- result->sub_succ_cnt = test->sub_succ_cnt;
}
/* generate summary */
fflush(stderr);
fflush(stdout);
- for (i = 0; i < prog_test_cnt; i++) {
- struct prog_test_def *current_test;
- struct test_result *result;
-
- current_test = &prog_test_defs[i];
- result = &test_results[i];
-
- if (!current_test->tested)
- continue;
-
- env.succ_cnt += result->error_cnt ? 0 : 1;
- env.skip_cnt += result->skip_cnt;
- if (result->error_cnt)
- env.fail_cnt++;
- env.sub_succ_cnt += result->sub_succ_cnt;
- }
-
- print_all_error_logs();
-
- fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
- env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+ calculate_summary_and_print_errors(&env);
/* reap all workers */
for (i = 0; i < env.workers; i++) {
@@ -1210,8 +1152,6 @@ static int server_main(void)
if (pid != env.worker_pids[i])
perror("Unable to reap worker");
}
-
- return 0;
}
static int worker_main(int sock)
@@ -1232,35 +1172,29 @@ static int worker_main(int sock)
env.worker_id);
goto out;
case MSG_DO_TEST: {
- int test_to_run;
- struct prog_test_def *test;
+ int test_to_run = msg.do_test.test_num;
+ struct prog_test_def *test = &prog_test_defs[test_to_run];
+ struct test_state *state = &test_states[test_to_run];
struct msg msg_done;
- test_to_run = msg.do_test.test_num;
- test = &prog_test_defs[test_to_run];
-
if (env.debug)
fprintf(stderr, "[%d]: #%d:%s running.\n",
env.worker_id,
test_to_run + 1,
test->test_name);
- stdio_hijack();
-
run_one_test(test_to_run);
- stdio_restore();
-
memset(&msg_done, 0, sizeof(msg_done));
msg_done.type = MSG_TEST_DONE;
msg_done.test_done.test_num = test_to_run;
- msg_done.test_done.error_cnt = test->error_cnt;
- msg_done.test_done.skip_cnt = test->skip_cnt;
- msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt;
+ msg_done.test_done.error_cnt = state->error_cnt;
+ msg_done.test_done.skip_cnt = state->skip_cnt;
+ msg_done.test_done.sub_succ_cnt = state->sub_succ_cnt;
msg_done.test_done.have_log = false;
- if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) {
- if (env.log_cnt)
+ if (env.verbosity > VERBOSE_NONE || state->force_log || state->error_cnt) {
+ if (state->log_cnt)
msg_done.test_done.have_log = true;
}
if (send_message(sock, &msg_done) < 0) {
@@ -1273,8 +1207,8 @@ static int worker_main(int sock)
char *src;
size_t slen;
- src = env.log_buf;
- slen = env.log_cnt;
+ src = state->log_buf;
+ slen = state->log_cnt;
while (slen) {
struct msg msg_log;
char *dest;
@@ -1294,10 +1228,10 @@ static int worker_main(int sock)
assert(send_message(sock, &msg_log) >= 0);
}
}
- if (env.log_buf) {
- free(env.log_buf);
- env.log_buf = NULL;
- env.log_cnt = 0;
+ if (state->log_buf) {
+ free(state->log_buf);
+ state->log_buf = NULL;
+ state->log_cnt = 0;
}
if (env.debug)
fprintf(stderr, "[%d]: #%d:%s done.\n",
@@ -1428,7 +1362,6 @@ int main(int argc, char **argv)
for (i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
- struct test_result *result;
if (!test->should_run)
continue;
@@ -1444,34 +1377,7 @@ int main(int argc, char **argv)
continue;
}
- stdio_hijack();
-
run_one_test(i);
-
- stdio_restore();
-
- fprintf(env.stdout, "#%d %s:%s\n",
- test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
- result = &test_results[i];
- result->error_cnt = test->error_cnt;
- if (env.log_buf) {
- result->log_buf = strdup(env.log_buf);
- result->log_cnt = env.log_cnt;
-
- free(env.log_buf);
- env.log_buf = NULL;
- env.log_cnt = 0;
- }
-
- if (test->error_cnt)
- env.fail_cnt++;
- else
- env.succ_cnt++;
-
- skip_account();
- env.sub_succ_cnt += test->sub_succ_cnt;
}
if (env.get_test_cnt) {
@@ -1482,21 +1388,14 @@ int main(int argc, char **argv)
if (env.list_test_names)
goto out;
- print_all_error_logs();
-
- fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
- env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+ calculate_summary_and_print_errors(&env);
close(env.saved_netns_fd);
out:
if (!env.list_test_names && env.has_testmod)
unload_bpf_testmod();
- free_str_set(&env.test_selector.blacklist);
- free_str_set(&env.test_selector.whitelist);
- free(env.test_selector.num_set);
- free_str_set(&env.subtest_selector.blacklist);
- free_str_set(&env.subtest_selector.whitelist);
- free(env.subtest_selector.num_set);
+
+ free_test_selector(&env.test_selector);
if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
return EXIT_NO_TEST;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 93c1ff705533..d3fee3b98888 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -25,6 +25,7 @@ typedef __u16 __sum16;
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/time.h>
+#include <sys/param.h>
#include <fcntl.h>
#include <pthread.h>
#include <linux/bpf.h>
@@ -37,7 +38,6 @@ typedef __u16 __sum16;
#include <bpf/bpf_endian.h>
#include "trace_helpers.h"
#include "testing_helpers.h"
-#include "flow_dissector_load.h"
enum verbosity {
VERBOSE_NONE,
@@ -46,18 +46,43 @@ enum verbosity {
VERBOSE_SUPER,
};
-struct str_set {
- const char **strs;
+struct test_filter {
+ char *name;
+ char **subtests;
+ int subtest_cnt;
+};
+
+struct test_filter_set {
+ struct test_filter *tests;
int cnt;
};
struct test_selector {
- struct str_set whitelist;
- struct str_set blacklist;
+ struct test_filter_set whitelist;
+ struct test_filter_set blacklist;
bool *num_set;
int num_set_len;
};
+struct test_state {
+ bool tested;
+ bool force_log;
+
+ int error_cnt;
+ int skip_cnt;
+ int subtest_skip_cnt;
+ int sub_succ_cnt;
+
+ char *subtest_name;
+ int subtest_num;
+
+ /* store counts before subtest started */
+ int old_error_cnt;
+
+ size_t log_cnt;
+ char *log_buf;
+};
+
struct test_env {
struct test_selector test_selector;
struct test_selector subtest_selector;
@@ -70,12 +95,11 @@ struct test_env {
bool get_test_cnt;
bool list_test_names;
- struct prog_test_def *test; /* current running tests */
+ struct prog_test_def *test; /* current running test */
+ struct test_state *test_state; /* current running test result */
FILE *stdout;
FILE *stderr;
- char *log_buf;
- size_t log_cnt;
int nr_cpus;
int succ_cnt; /* successful tests */
@@ -120,11 +144,12 @@ struct msg {
extern struct test_env env;
-extern void test__force_log();
-extern bool test__start_subtest(const char *name);
-extern void test__skip(void);
-extern void test__fail(void);
-extern int test__join_cgroup(const char *path);
+void test__force_log(void);
+bool test__start_subtest(const char *name);
+void test__end_subtest(void);
+void test__skip(void);
+void test__fail(void);
+int test__join_cgroup(const char *path);
#define PRINT_FAIL(format...) \
({ \
@@ -267,6 +292,17 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_HAS_SUBSTR(str, substr, name) ({ \
+ static int duration = 0; \
+ const char *___str = str; \
+ const char *___substr = substr; \
+ bool ___ok = strstr(___str, ___substr) != NULL; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: '%s' is not a substring of '%s'\n", \
+ (name), ___substr, ___str); \
+ ___ok; \
+})
+
#define ASSERT_OK(res, name) ({ \
static int duration = 0; \
long long ___res = (res); \
@@ -332,6 +368,8 @@ int trigger_module_test_write(int write_sz);
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
#elif defined(__s390x__)
#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
+#elif defined(__aarch64__)
+#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
#else
#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
#endif
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 4a64306728ab..3256de30f563 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -15,7 +15,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define CGROUP_PATH "/skb_cgroup_test"
@@ -160,6 +159,9 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
cgfd = cgroup_setup_and_join(CGROUP_PATH);
if (cgfd < 0)
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index fe10f8134278..810c3740b2cc 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -14,7 +14,6 @@
#include "cgroup_helpers.h"
#include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#define CG_PATH "/foo"
@@ -493,7 +492,7 @@ static int run_test_case(int cgfd, const struct sock_test *test)
goto err;
}
- if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+ if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) {
if (test->result == ATTACH_REJECT)
goto out;
else
@@ -541,6 +540,9 @@ int main(int argc, char **argv)
if (cgfd < 0)
goto err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index f3d5d7ac6505..458564fcfc82 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -19,7 +19,6 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#ifndef ENOTSUPP
@@ -1418,6 +1417,9 @@ int main(int argc, char **argv)
if (cgfd < 0)
goto err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index dfb4f5c0fcb9..0fbaccdc8861 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -18,7 +18,6 @@
#include <sched.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <sys/types.h>
#include <sys/sendfile.h>
@@ -37,7 +36,6 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
int running;
@@ -2017,6 +2015,9 @@ int main(int argc, char **argv)
cg_created = 1;
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
if (test == SELFTESTS) {
err = test_selftest(cg_fd, &options);
goto out;
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 4f6cf833b522..57620e7c9048 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -14,7 +14,6 @@
#include <bpf/libbpf.h>
#include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "testing_helpers.h"
@@ -1561,7 +1560,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
goto err;
}
- if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
+ if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) {
if (test->result == ATTACH_REJECT)
goto out;
else
@@ -1618,6 +1617,9 @@ int main(int argc, char **argv)
if (cgfd < 0)
goto err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 0851c42ee31c..5546b05a0486 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -20,7 +20,6 @@
#include <bpf/bpf.h>
#include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
#include "testing_helpers.h"
static struct bpf_insn prog[BPF_MAXINSNS];
@@ -189,6 +188,9 @@ int main(void)
uint32_t tests = 0;
int i, fd_map;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
sizeof(int), 1, &opts);
assert(fd_map > 0);
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index e7775d3bbe08..5c8ef062f760 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -15,7 +15,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
@@ -235,6 +234,9 @@ int main(int argc, char **argv)
exit(1);
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
if (results < 0) {
log_err("Can't get map");
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 4c5114765b23..8284db8b0f13 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -19,7 +19,6 @@
#include <linux/perf_event.h>
#include <linux/err.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index a2cd236c32eb..372579c9f45e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -53,7 +53,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 22
+#define MAX_NR_MAPS 23
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -101,6 +101,7 @@ struct bpf_test {
int fixup_map_reuseport_array[MAX_FIXUPS];
int fixup_map_ringbuf[MAX_FIXUPS];
int fixup_map_timer[MAX_FIXUPS];
+ int fixup_map_kptr[MAX_FIXUPS];
struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
* Can be a tab-separated sequence of expected strings. An empty string
@@ -621,8 +622,15 @@ static int create_cgroup_storage(bool percpu)
* struct timer {
* struct bpf_timer t;
* };
+ * struct btf_ptr {
+ * struct prog_test_ref_kfunc __kptr *ptr;
+ * struct prog_test_ref_kfunc __kptr_ref *ptr;
+ * struct prog_test_member __kptr_ref *ptr;
+ * }
*/
-static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t";
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
+ "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref"
+ "\0prog_test_member";
static __u32 btf_raw_types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -638,6 +646,22 @@ static __u32 btf_raw_types[] = {
/* struct timer */ /* [5] */
BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16),
BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
+ /* struct prog_test_ref_kfunc */ /* [6] */
+ BTF_STRUCT_ENC(51, 0, 0),
+ BTF_STRUCT_ENC(89, 0, 0), /* [7] */
+ /* type tag "kptr" */
+ BTF_TYPE_TAG_ENC(75, 6), /* [8] */
+ /* type tag "kptr_ref" */
+ BTF_TYPE_TAG_ENC(80, 6), /* [9] */
+ BTF_TYPE_TAG_ENC(80, 7), /* [10] */
+ BTF_PTR_ENC(8), /* [11] */
+ BTF_PTR_ENC(9), /* [12] */
+ BTF_PTR_ENC(10), /* [13] */
+ /* struct btf_ptr */ /* [14] */
+ BTF_STRUCT_ENC(43, 3, 24),
+ BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */
+ BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */
+ BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
};
static int load_btf(void)
@@ -727,6 +751,25 @@ static int create_map_timer(void)
return fd;
}
+static int create_map_kptr(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 14,
+ );
+ int fd, btf_fd;
+
+ btf_fd = load_btf();
+ if (btf_fd < 0)
+ return -1;
+
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts);
+ if (fd < 0)
+ printf("Failed to create map with btf_id pointer\n");
+ return fd;
+}
+
static char bpf_vlog[UINT_MAX >> 8];
static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
@@ -754,6 +797,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
int *fixup_map_ringbuf = test->fixup_map_ringbuf;
int *fixup_map_timer = test->fixup_map_timer;
+ int *fixup_map_kptr = test->fixup_map_kptr;
struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
if (test->fill_helper) {
@@ -947,6 +991,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_map_timer++;
} while (*fixup_map_timer);
}
+ if (*fixup_map_kptr) {
+ map_fds[22] = create_map_kptr();
+ do {
+ prog[*fixup_map_kptr].imm = map_fds[22];
+ fixup_map_kptr++;
+ } while (*fixup_map_kptr);
+ }
/* Patch in kfunc BTF IDs */
if (fixup_kfunc_btf_id->kfunc) {
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index 8d6918c3b4a2..70feda97cee5 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -11,8 +11,6 @@
#include <bpf/bpf.h>
-#include "bpf_rlimit.h"
-
#define LOG_SIZE (1 << 20)
#define err(str...) printf("ERROR: " str)
@@ -141,6 +139,9 @@ int main(int argc, char **argv)
memset(log, 1, LOG_SIZE);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
/* Test incorrect attr */
printf("Test log_level 0...\n");
test_log_bad(log, LOG_SIZE, 0);
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 795b6798ccee..9695318e8132 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -6,6 +6,7 @@
#include <errno.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "test_progs.h"
#include "testing_helpers.h"
int parse_num_list(const char *s, bool **num_set, int *num_set_len)
@@ -60,7 +61,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
set[i] = true;
}
- if (!set)
+ if (!set || parsing_end)
return -EINVAL;
*num_set = set;
@@ -69,6 +70,94 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
return 0;
}
+int parse_test_list(const char *s,
+ struct test_filter_set *set,
+ bool is_glob_pattern)
+{
+ char *input, *state = NULL, *next;
+ struct test_filter *tmp, *tests = NULL;
+ int i, j, cnt = 0;
+
+ input = strdup(s);
+ if (!input)
+ return -ENOMEM;
+
+ while ((next = strtok_r(state ? NULL : input, ",", &state))) {
+ char *subtest_str = strchr(next, '/');
+ char *pattern = NULL;
+ int glob_chars = 0;
+
+ tmp = realloc(tests, sizeof(*tests) * (cnt + 1));
+ if (!tmp)
+ goto err;
+ tests = tmp;
+
+ tests[cnt].subtest_cnt = 0;
+ tests[cnt].subtests = NULL;
+
+ if (is_glob_pattern) {
+ pattern = "%s";
+ } else {
+ pattern = "*%s*";
+ glob_chars = 2;
+ }
+
+ if (subtest_str) {
+ char **tmp_subtests = NULL;
+ int subtest_cnt = tests[cnt].subtest_cnt;
+
+ *subtest_str = '\0';
+ subtest_str += 1;
+ tmp_subtests = realloc(tests[cnt].subtests,
+ sizeof(*tmp_subtests) *
+ (subtest_cnt + 1));
+ if (!tmp_subtests)
+ goto err;
+ tests[cnt].subtests = tmp_subtests;
+
+ tests[cnt].subtests[subtest_cnt] =
+ malloc(strlen(subtest_str) + glob_chars + 1);
+ if (!tests[cnt].subtests[subtest_cnt])
+ goto err;
+ sprintf(tests[cnt].subtests[subtest_cnt],
+ pattern,
+ subtest_str);
+
+ tests[cnt].subtest_cnt++;
+ }
+
+ tests[cnt].name = malloc(strlen(next) + glob_chars + 1);
+ if (!tests[cnt].name)
+ goto err;
+ sprintf(tests[cnt].name, pattern, next);
+
+ cnt++;
+ }
+
+ tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt));
+ if (!tmp)
+ goto err;
+
+ memcpy(tmp + set->cnt, tests, sizeof(*tests) * cnt);
+ set->tests = tmp;
+ set->cnt += cnt;
+
+ free(tests);
+ free(input);
+ return 0;
+
+err:
+ for (i = 0; i < cnt; i++) {
+ for (j = 0; j < tests[i].subtest_cnt; j++)
+ free(tests[i].subtests[j]);
+
+ free(tests[i].name);
+ }
+ free(tests);
+ free(input);
+ return -ENOMEM;
+}
+
__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
{
__u32 info_len = sizeof(*info);
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index f46ebc476ee8..6ec00bf79cb5 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -12,3 +12,11 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
__u32 kern_version, char *log_buf,
size_t log_buf_sz);
+
+/*
+ * below function is exported for testing in prog_test test
+ */
+struct test_filter_set;
+int parse_test_list(const char *s,
+ struct test_filter_set *test_set,
+ bool is_glob_pattern);
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 3d6217e3aff7..9c4be2cdb21a 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -25,15 +25,12 @@ static int ksym_cmp(const void *p1, const void *p2)
int load_kallsyms(void)
{
- FILE *f = fopen("/proc/kallsyms", "r");
+ FILE *f;
char func[256], buf[256];
char symbol;
void *addr;
int i = 0;
- if (!f)
- return -ENOENT;
-
/*
* This is called/used from multiplace places,
* load symbols just once.
@@ -41,6 +38,10 @@ int load_kallsyms(void)
if (sym_cnt)
return 0;
+ f = fopen("/proc/kallsyms", "r");
+ if (!f)
+ return -ENOENT;
+
while (fgets(buf, sizeof(buf), f)) {
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index db781052758d..e92644d0fa75 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -1,32 +1,85 @@
+#include <stdbool.h>
#include <stdio.h>
#include <unistd.h>
+#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
+#include <signal.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
#define BUF_SIZE 256
+/* defined in urandom_read_aux.c */
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+/* these are coming from urandom_read_lib{1,2}.c */
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+
+unsigned short urand_read_with_sema_semaphore SEC(".probes");
+
static __attribute__((noinline))
void urandom_read(int fd, int count)
{
- char buf[BUF_SIZE];
- int i;
+ char buf[BUF_SIZE];
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ read(fd, buf, BUF_SIZE);
+
+ /* trigger USDTs defined in executable itself */
+ urand_read_without_sema(i, count, BUF_SIZE);
+ STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE);
- for (i = 0; i < count; ++i)
- read(fd, buf, BUF_SIZE);
+ /* trigger USDTs defined in shared lib */
+ urandlib_read_without_sema(i, count, BUF_SIZE);
+ urandlib_read_with_sema(i, count, BUF_SIZE);
+ }
+}
+
+static volatile bool parent_ready;
+
+static void handle_sigpipe(int sig)
+{
+ parent_ready = true;
}
int main(int argc, char *argv[])
{
int fd = open("/dev/urandom", O_RDONLY);
int count = 4;
+ bool report_pid = false;
if (fd < 0)
return 1;
- if (argc == 2)
+ if (argc >= 2)
count = atoi(argv[1]);
+ if (argc >= 3) {
+ report_pid = true;
+ /* install SIGPIPE handler to catch when parent closes their
+ * end of the pipe (on the other side of our stdout)
+ */
+ signal(SIGPIPE, handle_sigpipe);
+ }
+
+ /* report PID and wait for parent process to send us "signal" by
+ * closing stdout
+ */
+ if (report_pid) {
+ while (!parent_ready) {
+ fprintf(stdout, "%d\n", getpid());
+ fflush(stdout);
+ }
+ /* at this point stdout is closed, parent process knows our
+ * PID and is ready to trace us
+ */
+ }
urandom_read(fd, count);
diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c
new file mode 100644
index 000000000000..6132edcfea74
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_aux.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ /* semaphore-less USDT */
+ STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
new file mode 100644
index 000000000000..86186e24b740
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
+
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c
new file mode 100644
index 000000000000..9d401ad9838f
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib2.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 2e03decb11b6..743ed34c1238 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -139,6 +139,26 @@
},
},
{
+ "calls: invalid kfunc call: don't match first member type when passed to release kfunc",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_memb_acquire", 1 },
+ { "bpf_kfunc_call_memb1_release", 5 },
+ },
+},
+{
"calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset",
.insns = {
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
new file mode 100644
index 000000000000..9113834640e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -0,0 +1,469 @@
+/* Common tests */
+{
+ "map_kptr: BPF_ST imm != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_ST imm must be 0 when storing to kptr at off=0",
+},
+{
+ "map_kptr: size != bpf_size_to_bytes(BPF_DW)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access size must be BPF_DW",
+},
+{
+ "map_kptr: map_value non-const var_off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access cannot have variable offset",
+},
+{
+ "map_kptr: bpf_kptr_xchg non-const var_off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset",
+},
+{
+ "map_kptr: unaligned boundary load/store",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access misaligned expected=0 off=7",
+},
+{
+ "map_kptr: reject var_off != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed",
+},
+/* Tests for unreferened PTR_TO_BTF_ID */
+{
+ "map_kptr: unref: reject btf_struct_ids_match == false",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test",
+},
+{
+ "map_kptr: unref: loaded pointer marked as untrusted",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'",
+},
+{
+ "map_kptr: unref: correct in kernel type size",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 24),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "access beyond struct prog_test_ref_kfunc at off 24 size 8",
+},
+{
+ "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_",
+},
+{
+ "map_kptr: unref: no reference state created",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = ACCEPT,
+},
+{
+ "map_kptr: unref: bpf_kptr_xchg rejected",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "off=0 kptr isn't referenced kptr",
+},
+{
+ "map_kptr: unref: bpf_kfunc_call_test_kptr_get rejected",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "arg#0 no referenced kptr at map value offset=0",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_kptr_get", 13 },
+ }
+},
+/* Tests for referenced PTR_TO_BTF_ID */
+{
+ "map_kptr: ref: loaded pointer marked as untrusted",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_",
+},
+{
+ "map_kptr: ref: reject off != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member",
+},
+{
+ "map_kptr: ref: reference state created and released on xchg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "Unreleased reference id=5 alloc_insn=20",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 15 },
+ }
+},
+{
+ "map_kptr: ref: reject STX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "store to referenced kptr disallowed",
+},
+{
+ "map_kptr: ref: reject ST",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "store to referenced kptr disallowed",
+},
+{
+ "map_kptr: reject helper access to kptr",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr cannot be accessed indirectly by helper",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index fbd682520e47..57a83d763ec1 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -796,7 +796,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "reference has not been acquired before",
+ .errstr = "R1 must be referenced when passed to release function",
},
{
/* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 86b24cad27a7..d11d0b28be41 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -417,7 +417,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "reference has not been acquired before",
+ .errstr = "R1 must be referenced when passed to release function",
},
{
"bpf_sk_release(bpf_sk_fullsock(skb->sk))",
@@ -436,7 +436,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "reference has not been acquired before",
+ .errstr = "R1 must be referenced when passed to release function",
},
{
"bpf_sk_release(bpf_tcp_sock(skb->sk))",
@@ -455,7 +455,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "reference has not been acquired before",
+ .errstr = "R1 must be referenced when passed to release function",
},
{
"sk_storage_get(map, skb->sk, NULL, 0): value == NULL",
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index aaedbf4955c3..c03b3a75991f 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -10,7 +10,6 @@
#include <net/if.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/socket.h>
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index c567856fd1bc..5b6f977870f8 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -12,7 +12,6 @@
#include <string.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
#include <net/if.h>
#include <sys/types.h>
#include <sys/socket.h>
@@ -89,7 +88,6 @@ int main(int argc, char **argv)
{
__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
struct addrinfo *a, hints = { .ai_family = AF_INET };
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
__u16 count = XDPING_DEFAULT_COUNT;
struct pinginfo pinginfo = { 0 };
const char *optstr = "c:I:NsS";
@@ -167,10 +165,8 @@ int main(int argc, char **argv)
freeaddrinfo(a);
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 5f8296d29e77..cfcb031323c5 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -90,7 +90,6 @@
#include <string.h>
#include <stddef.h>
#include <sys/mman.h>
-#include <sys/resource.h>
#include <sys/types.h>
#include <sys/queue.h>
#include <time.h>
@@ -1448,14 +1447,13 @@ static void ifobject_delete(struct ifobject *ifobj)
int main(int argc, char **argv)
{
- struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
struct pkt_stream *pkt_stream_default;
struct ifobject *ifobj_tx, *ifobj_rx;
struct test_spec test;
u32 i, j;
- if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
- exit_with_error(errno);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
ifobj_tx = ifobject_create();
if (!ifobj_tx)
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
new file mode 120000
index 000000000000..f5eb940c4c7c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_locked_port.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
new file mode 120000
index 000000000000..76492da525f7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_mdb.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
new file mode 120000
index 000000000000..81a7e0df0474
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_mld.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
new file mode 120000
index 000000000000..9831ed74376a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_aware.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
new file mode 120000
index 000000000000..7f3c3f0bf719
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_mcast.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
new file mode 120000
index 000000000000..bf1a57e6bde1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_unaware.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config
new file mode 100644
index 000000000000..7adc1396fae0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config
@@ -0,0 +1,2 @@
+NETIF_CREATE=no
+STABLE_MAC_ADDRS=yes
diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh
new file mode 120000
index 000000000000..39c96828c5ef
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/lib.sh
@@ -0,0 +1 @@
+../../../net/forwarding/lib.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
new file mode 120000
index 000000000000..c08166f84501
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -0,0 +1 @@
+../../../net/forwarding/local_termination.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
new file mode 120000
index 000000000000..b9757466bc97
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -0,0 +1 @@
+../../../net/forwarding/no_forwarding.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
new file mode 100755
index 000000000000..53a65f416770
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -0,0 +1,341 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In addition to the common variables, user might use:
+# LC_SLOT - If not set, all probed line cards are going to be tested,
+# with an exception of the "activation_16x100G_test".
+# It set, only the selected line card is going to be used
+# for tests, including "activation_16x100G_test".
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ unprovision_test
+ provision_test
+ activation_16x100G_test
+"
+
+NUM_NETIFS=0
+
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+until_lc_state_is()
+{
+ local state=$1; shift
+ local current=$("$@")
+
+ echo "$current"
+ [ "$current" == "$state" ]
+}
+
+until_lc_state_is_not()
+{
+ ! until_lc_state_is "$@"
+}
+
+lc_state_get()
+{
+ local lc=$1
+
+ devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state"
+}
+
+lc_wait_until_state_changes()
+{
+ local lc=$1
+ local state=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc"
+}
+
+lc_wait_until_state_becomes()
+{
+ local lc=$1
+ local state=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc"
+}
+
+until_lc_port_count_is()
+{
+ local port_count=$1; shift
+ local current=$("$@")
+
+ echo "$current"
+ [ $current == $port_count ]
+}
+
+lc_port_count_get()
+{
+ local lc=$1
+
+ devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l
+}
+
+lc_wait_until_port_count_is()
+{
+ local lc=$1
+ local port_count=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc"
+}
+
+PROV_UNPROV_TIMEOUT=8000 # ms
+POST_PROV_ACT_TIMEOUT=2000 # ms
+PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms
+
+unprovision_one()
+{
+ local lc=$1
+ local state
+
+ state=$(lc_state_get $lc)
+ check_err $? "Failed to get state of linecard $lc"
+ if [[ "$state" == "unprovisioned" ]]; then
+ return
+ fi
+
+ log_info "Unprovisioning linecard $lc"
+
+ devlink lc set $DEVLINK_DEV lc $lc notype
+ check_err $? "Failed to trigger linecard $lc unprovisioning"
+
+ state=$(lc_wait_until_state_changes $lc "unprovisioning" \
+ $PROV_UNPROV_TIMEOUT)
+ check_err $? "Failed to unprovision linecard $lc (timeout)"
+
+ [ "$state" == "unprovisioned" ]
+ check_err $? "Failed to unprovision linecard $lc (state=$state)"
+}
+
+provision_one()
+{
+ local lc=$1
+ local type=$2
+ local state
+
+ log_info "Provisioning linecard $lc"
+
+ devlink lc set $DEVLINK_DEV lc $lc type $type
+ check_err $? "Failed trigger linecard $lc provisioning"
+
+ state=$(lc_wait_until_state_changes $lc "provisioning" \
+ $PROV_UNPROV_TIMEOUT)
+ check_err $? "Failed to provision linecard $lc (timeout)"
+
+ [ "$state" == "provisioned" ] || [ "$state" == "active" ]
+ check_err $? "Failed to provision linecard $lc (state=$state)"
+
+ provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type")
+ [ "$provisioned_type" == "$type" ]
+ check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")"
+
+ # Wait for possible activation to make sure the state
+ # won't change after return from this function.
+ state=$(lc_wait_until_state_becomes $lc "active" \
+ $POST_PROV_ACT_TIMEOUT)
+}
+
+unprovision_test()
+{
+ RET=0
+ local lc
+
+ lc=$LC_SLOT
+ unprovision_one $lc
+ log_test "Unprovision"
+}
+
+LC_16X100G_TYPE="16x100G"
+LC_16X100G_PORT_COUNT=16
+LC_16X100G_DEVICE_COUNT=4
+
+supported_types_check()
+{
+ local lc=$1
+ local supported_types_count
+ local type_index
+ local lc_16x100_found=false
+
+ supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].supported_types | length")
+ [ $supported_types_count != 0 ]
+ check_err $? "No supported types found for linecard $lc"
+ for (( type_index=0; type_index<$supported_types_count; type_index++ ))
+ do
+ type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].supported_types[$type_index]")
+ if [[ "$type" == "$LC_16X100G_TYPE" ]]; then
+ lc_16x100_found=true
+ break
+ fi
+ done
+ [ $lc_16x100_found = true ]
+ check_err $? "16X100G not found between supported types of linecard $lc"
+}
+
+lc_info_check()
+{
+ local lc=$1
+ local fixed_hw_revision
+ local running_ini_version
+
+ fixed_hw_revision=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
+ jq -e -r '.[][][].versions.fixed."hw.revision"')
+ check_err $? "Failed to get linecard $lc fixed.hw.revision"
+ log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\""
+ running_ini_version=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
+ jq -e -r '.[][][].versions.running."ini.version"')
+ check_err $? "Failed to get linecard $lc running.ini.version"
+ log_info "Linecard $lc running.ini.version: \"$running_ini_version\""
+}
+
+lc_devices_check()
+{
+ local lc=$1
+ local expected_device_count=$2
+ local device_count
+ local device
+
+ device_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].devices |length")
+ check_err $? "Failed to get linecard $lc device count"
+ [ $device_count != 0 ]
+ check_err $? "No device found on linecard $lc"
+ [ $device_count == $expected_device_count ]
+ check_err $? "Unexpected device count on linecard $lc (got $expected_device_count, expected $device_count)"
+ for (( device=0; device<device_count; device++ ))
+ do
+ log_info "Linecard $lc device $device"
+ done
+}
+
+ports_check()
+{
+ local lc=$1
+ local expected_port_count=$2
+ local port_count
+
+ port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \
+ $PROV_PORTS_INSTANTIATION_TIMEOUT)
+ [ $port_count != 0 ]
+ check_err $? "No port associated with linecard $lc"
+ [ $port_count == $expected_port_count ]
+ check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)"
+}
+
+provision_test()
+{
+ RET=0
+ local lc
+ local type
+ local state
+
+ lc=$LC_SLOT
+ supported_types_check $lc
+ state=$(lc_state_get $lc)
+ check_err $? "Failed to get state of linecard $lc"
+ if [[ "$state" != "unprovisioned" ]]; then
+ unprovision_one $lc
+ fi
+ provision_one $lc $LC_16X100G_TYPE
+ lc_devices_check $lc $LC_16X100G_DEVICE_COUNT
+ lc_info_check $lc
+ ports_check $lc $LC_16X100G_PORT_COUNT
+ log_test "Provision"
+}
+
+ACTIVATION_TIMEOUT=20000 # ms
+
+interface_check()
+{
+ ip link set $h1 up
+ ip link set $h2 up
+ ifaces_upped=true
+ setup_wait
+}
+
+lc_devices_info_check()
+{
+ local lc=$1
+ local expected_device_count=$2
+ local device_count
+ local device
+ local running_device_fw
+
+ device_count=$(devlink lc info $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].devices |length")
+ check_err $? "Failed to get linecard $lc device count"
+ for (( device=0; device<device_count; device++ ))
+ do
+ running_device_fw=$(devlink lc -v info $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].devices[$device].versions.running.fw")
+ check_err $? "Failed to get linecard $lc device $device running fw version"
+ log_info "Linecard $lc device $device running.fw: \"$running_device_fw\""
+ done
+}
+
+activation_16x100G_test()
+{
+ RET=0
+ local lc
+ local type
+ local state
+
+ lc=$LC_SLOT
+ type=$LC_16X100G_TYPE
+
+ unprovision_one $lc
+ provision_one $lc $type
+ state=$(lc_wait_until_state_becomes $lc "active" \
+ $ACTIVATION_TIMEOUT)
+ check_err $? "Failed to get linecard $lc activated (timeout)"
+
+ lc_devices_info_check $lc $LC_16X100G_DEVICE_COUNT
+
+ interface_check
+
+ log_test "Activation 16x100G"
+}
+
+setup_prepare()
+{
+ local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length")
+ if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then
+ echo "SKIP: No linecard support found"
+ exit $ksft_skip
+ fi
+
+ if [ -z "$LC_SLOT" ]; then
+ echo "SKIP: \"LC_SLOT\" variable not provided"
+ exit $ksft_skip
+ fi
+
+ # Interfaces are not present during the script start,
+ # that's why we define NUM_NETIFS here so dummy
+ # implicit veth pairs are not created.
+ NUM_NETIFS=2
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ ifaces_upped=false
+}
+
+cleanup()
+{
+ if [ "$ifaces_upped" = true ] ; then
+ ip link set $h1 down
+ ip link set $h2 down
+ fi
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
new file mode 100755
index 000000000000..82a47b903f92
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
@@ -0,0 +1,480 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sends 1Gbps of traffic through the switch, into which it then
+# injects a burst of traffic and tests that there are no drops.
+#
+# The 1Gbps stream is created by sending >1Gbps stream from H1. This stream
+# ingresses through $swp1, and is forwarded thtrough a small temporary pool to a
+# 1Gbps $swp3.
+#
+# Thus a 1Gbps stream enters $swp4, and is forwarded through a large pool to
+# $swp2, and eventually to H2. Since $swp2 is a 1Gbps port as well, no backlog
+# is generated.
+#
+# At this point, a burst of traffic is forwarded from H3. This enters $swp5, is
+# forwarded to $swp2, which is fully subscribed by the 1Gbps stream. The
+# expectation is that the burst is wholly absorbed by the large pool and no
+# drops are caused. After the burst, there should be a backlog that is hard to
+# get rid of, because $sw2 is fully subscribed. But because each individual
+# packet is scheduled soon after getting enqueued, SLL and HLL do not impact the
+# test.
+#
+# +-----------------------+ +-----------------------+
+# | H1 | | H3 |
+# | + $h1.111 | | $h3.111 + |
+# | | 192.0.2.33/28 | | 192.0.2.35/28 | |
+# | | | | | |
+# | + $h1 | | $h3 + |
+# +---|-------------------+ +--------------------+ +------------------|----+
+# | | | |
+# +---|----------------------|--------------------|----------------------|----+
+# | + $swp1 $swp3 + + $swp4 $swp5 | |
+# | | iPOOL1 iPOOL0 | | iPOOL2 iPOOL2 | |
+# | | ePOOL4 ePOOL5 | | ePOOL4 ePOOL4 | |
+# | | 1Gbps | | 1Gbps | |
+# | +-|----------------------|-+ +-|----------------------|-+ |
+# | | + $swp1.111 $swp3.111 + | | + $swp4.111 $swp5.111 + | |
+# | | | | | |
+# | | BR1 | | BR2 | |
+# | | | | | |
+# | | | | + $swp2.111 | |
+# | +--------------------------+ +---------|----------------+ |
+# | | |
+# | iPOOL0: 500KB dynamic | |
+# | iPOOL1: 500KB dynamic | |
+# | iPOOL2: 10MB dynamic + $swp2 |
+# | ePOOL4: 500KB dynamic | iPOOL0 |
+# | ePOOL5: 500KB dnamic | ePOOL6 |
+# | ePOOL6: 10MB dynamic | 1Gbps |
+# +-------------------------------------------------------|-------------------+
+# |
+# +---|-------------------+
+# | + $h2 H2 |
+# | | 1Gbps |
+# | | |
+# | + $h2.111 |
+# | 192.0.2.34/28 |
+# +-----------------------+
+#
+# iPOOL0+ePOOL4 are helper pools for control traffic etc.
+# iPOOL1+ePOOL5 are helper pools for modeling the 1Gbps stream
+# iPOOL2+ePOOL6 are pools for soaking the burst traffic
+
+ALL_TESTS="
+ ping_ipv4
+ test_8K
+ test_800
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=8
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+source mlxsw_lib.sh
+
+_1KB=1000
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+
+# The failure mode that this specifically tests is exhaustion of descriptor
+# buffer. The point is to produce a burst that shared buffer should be able
+# to accommodate, but produce it with small enough packets that the machine
+# runs out of the descriptor buffer space with default configuration.
+#
+# The machine therefore needs to be able to produce line rate with as small
+# packets as possible, and at the same time have large enough buffer that
+# when filled with these small packets, it runs out of descriptors.
+# Spectrum-2 is very close, but cannot perform this test. Therefore use
+# Spectrum-3 as a minimum, and permit larger burst size, and therefore
+# larger packets, to reduce spurious failures.
+#
+mlxsw_only_on_spectrum 3+ || exit
+
+BURST_SIZE=$((50000000))
+POOL_SIZE=$BURST_SIZE
+
+h1_create()
+{
+ simple_if_init $h1
+ mtu_set $h1 10000
+
+ vlan_create $h1 111 v$h1 192.0.2.33/28
+ ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 111
+
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ mtu_set $h2 10000
+ ethtool -s $h2 speed 1000 autoneg off
+
+ vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 111
+
+ ethtool -s $h2 autoneg on
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ mtu_set $h3 10000
+
+ vlan_create $h3 111 v$h3 192.0.2.35/28
+}
+
+h3_destroy()
+{
+ vlan_destroy $h3 111
+
+ mtu_restore $h3
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ # pools
+ # -----
+
+ devlink_pool_size_thtype_save 0
+ devlink_pool_size_thtype_save 4
+ devlink_pool_size_thtype_save 1
+ devlink_pool_size_thtype_save 5
+ devlink_pool_size_thtype_save 2
+ devlink_pool_size_thtype_save 6
+
+ devlink_port_pool_th_save $swp1 1
+ devlink_port_pool_th_save $swp2 6
+ devlink_port_pool_th_save $swp3 5
+ devlink_port_pool_th_save $swp4 2
+ devlink_port_pool_th_save $swp5 2
+
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
+ devlink_tc_bind_pool_th_save $swp2 1 egress
+ devlink_tc_bind_pool_th_save $swp3 1 egress
+ devlink_tc_bind_pool_th_save $swp4 1 ingress
+ devlink_tc_bind_pool_th_save $swp5 1 ingress
+
+ # Control traffic pools. Just reduce the size.
+ devlink_pool_size_thtype_set 0 dynamic $_500KB
+ devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+ # Stream modeling pools.
+ devlink_pool_size_thtype_set 1 dynamic $_500KB
+ devlink_pool_size_thtype_set 5 dynamic $_500KB
+
+ # Burst soak pools.
+ devlink_pool_size_thtype_set 2 static $POOL_SIZE
+ devlink_pool_size_thtype_set 6 static $POOL_SIZE
+
+ # $swp1
+ # -----
+
+ ip link set dev $swp1 up
+ mtu_set $swp1 10000
+ vlan_create $swp1 111
+ ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp1 1 16
+ devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
+
+ # Configure qdisc...
+ tc qdisc replace dev $swp1 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ # ... so that we can assign prio1 traffic to PG1.
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+ # $swp2
+ # -----
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 10000
+ ethtool -s $swp2 speed 1000 autoneg off
+ vlan_create $swp2 111
+ ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp2 6 $POOL_SIZE
+ devlink_tc_bind_pool_th_set $swp2 1 egress 6 $POOL_SIZE
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6)
+ tc qdisc replace dev $swp2 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+
+ # $swp3
+ # -----
+
+ ip link set dev $swp3 up
+ mtu_set $swp3 10000
+ ethtool -s $swp3 speed 1000 autoneg off
+ vlan_create $swp3 111
+ ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp3 5 16
+ devlink_tc_bind_pool_th_set $swp3 1 egress 5 16
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6)
+ tc qdisc replace dev $swp3 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+
+ # $swp4
+ # -----
+
+ ip link set dev $swp4 up
+ mtu_set $swp4 10000
+ ethtool -s $swp4 speed 1000 autoneg off
+ vlan_create $swp4 111
+ ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp4 2 $POOL_SIZE
+ devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $POOL_SIZE
+
+ # Configure qdisc...
+ tc qdisc replace dev $swp4 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ # ... so that we can assign prio1 traffic to PG1.
+ dcb buffer set dev $swp4 prio-buffer all:0 1:1
+
+ # $swp5
+ # -----
+
+ ip link set dev $swp5 up
+ mtu_set $swp5 10000
+ vlan_create $swp5 111
+ ip link set dev $swp5.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp5 2 $POOL_SIZE
+ devlink_tc_bind_pool_th_set $swp5 1 ingress 2 $POOL_SIZE
+
+ # Configure qdisc...
+ tc qdisc replace dev $swp5 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ # ... so that we can assign prio1 traffic to PG1.
+ dcb buffer set dev $swp5 prio-buffer all:0 1:1
+
+ # bridges
+ # -------
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev $swp1.111 master br1
+ ip link set dev $swp3.111 master br1
+ ip link set dev br1 up
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev $swp2.111 master br2
+ ip link set dev $swp4.111 master br2
+ ip link set dev $swp5.111 master br2
+ ip link set dev br2 up
+}
+
+switch_destroy()
+{
+ # Do this first so that we can reset the limits to values that are only
+ # valid for the original static / dynamic setting.
+ devlink_pool_size_thtype_restore 6
+ devlink_pool_size_thtype_restore 5
+ devlink_pool_size_thtype_restore 4
+ devlink_pool_size_thtype_restore 2
+ devlink_pool_size_thtype_restore 1
+ devlink_pool_size_thtype_restore 0
+
+ # bridges
+ # -------
+
+ ip link set dev br2 down
+ ip link set dev $swp5.111 nomaster
+ ip link set dev $swp4.111 nomaster
+ ip link set dev $swp2.111 nomaster
+ ip link del dev br2
+
+ ip link set dev br1 down
+ ip link set dev $swp3.111 nomaster
+ ip link set dev $swp1.111 nomaster
+ ip link del dev br1
+
+ # $swp5
+ # -----
+
+ dcb buffer set dev $swp5 prio-buffer all:0
+ tc qdisc del dev $swp5 root
+
+ devlink_tc_bind_pool_th_restore $swp5 1 ingress
+ devlink_port_pool_th_restore $swp5 2
+
+ vlan_destroy $swp5 111
+ mtu_restore $swp5
+ ip link set dev $swp5 down
+
+ # $swp4
+ # -----
+
+ dcb buffer set dev $swp4 prio-buffer all:0
+ tc qdisc del dev $swp4 root
+
+ devlink_tc_bind_pool_th_restore $swp4 1 ingress
+ devlink_port_pool_th_restore $swp4 2
+
+ vlan_destroy $swp4 111
+ ethtool -s $swp4 autoneg on
+ mtu_restore $swp4
+ ip link set dev $swp4 down
+
+ # $swp3
+ # -----
+
+ tc qdisc del dev $swp3 root
+
+ devlink_tc_bind_pool_th_restore $swp3 1 egress
+ devlink_port_pool_th_restore $swp3 5
+
+ vlan_destroy $swp3 111
+ ethtool -s $swp3 autoneg on
+ mtu_restore $swp3
+ ip link set dev $swp3 down
+
+ # $swp2
+ # -----
+
+ tc qdisc del dev $swp2 root
+
+ devlink_tc_bind_pool_th_restore $swp2 1 egress
+ devlink_port_pool_th_restore $swp2 6
+
+ vlan_destroy $swp2 111
+ ethtool -s $swp2 autoneg on
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ # $swp1
+ # -----
+
+ dcb buffer set dev $swp1 prio-buffer all:0
+ tc qdisc del dev $swp1 root
+
+ devlink_tc_bind_pool_th_restore $swp1 1 ingress
+ devlink_port_pool_th_restore $swp1 1
+
+ vlan_destroy $swp1 111
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ swp4=${NETIFS[p6]}
+
+ swp5=${NETIFS[p7]}
+ h3=${NETIFS[p8]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.34 " h1->h2"
+ ping_test $h3 192.0.2.34 " h3->h2"
+}
+
+__test_qos_burst()
+{
+ local pktsize=$1; shift
+
+ RET=0
+
+ start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
+ sleep 1
+
+ local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+ ((q0 == 0))
+ check_err $? "Transmit queue non-zero?"
+
+ local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+
+ local cell_size=$(devlink_cell_size_get)
+ local cells=$((BURST_SIZE / cell_size))
+ # Each packet is $pktsize of payload + headers.
+ local pkt_cells=$(((pktsize + 50 + cell_size - 1) / cell_size))
+ # How many packets can we admit:
+ local pkts=$((cells / pkt_cells))
+
+ $MZ $h3 -p $pktsize -Q 1:111 -A 192.0.2.35 -B 192.0.2.34 \
+ -a own -b $h2mac -c $pkts -t udp -q
+ sleep 1
+
+ local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+ ((d1 == d0))
+ check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
+
+ # Check that the queue is somewhat close to the burst size This
+ # makes sure that the lack of drops above was not due to port
+ # undersubscribtion.
+ local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+ local qe=$((90 * BURST_SIZE / 100))
+ ((q0 > qe))
+ check_err $? "Queue size expected >$qe, got $q0"
+
+ stop_traffic
+ sleep 2
+
+ log_test "Burst: absorb $pkts ${pktsize}-B packets"
+}
+
+test_8K()
+{
+ __test_qos_burst 8000
+}
+
+test_800()
+{
+ __test_qos_burst 800
+}
+
+bail_on_lldpad
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index f4493ef9cca1..3569ff45f7d5 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -371,9 +371,9 @@ test_tc_int_buf()
tc qdisc delete dev $swp root
}
-trap cleanup EXIT
-
bail_on_lldpad
+
+trap cleanup EXIT
setup_wait
tests_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5d5622fc2758..f9858e221996 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -393,9 +393,9 @@ test_qos_pfc()
log_test "PFC"
}
-trap cleanup EXIT
-
bail_on_lldpad
+
+trap cleanup EXIT
setup_prepare
setup_wait
tests_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 1e5ad3209436..7a73057206cd 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -166,12 +166,11 @@ ecn_mirror_test()
uninstall_qdisc
}
-trap cleanup EXIT
+bail_on_lldpad
+trap cleanup EXIT
setup_prepare
setup_wait
-
-bail_on_lldpad
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index d79a82f317d2..501d192529ac 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -73,12 +73,11 @@ red_mirror_test()
uninstall_qdisc
}
-trap cleanup EXIT
+bail_on_lldpad
+trap cleanup EXIT
setup_prepare
setup_wait
-
-bail_on_lldpad
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
new file mode 100755
index 000000000000..c51c83421c61
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 NXP
+
+# The script is mostly generic, with the exception of the
+# ethtool per-TC counter names ("rx_green_prio_${tc}")
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h1_vlan_create()
+{
+ local vid=$1
+
+ vlan_create $h1 $vid
+ simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ ip link set $h1.$vid type vlan \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+ ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h1_vlan_destroy()
+{
+ local vid=$1
+
+ simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ vlan_destroy $h1 $vid
+}
+
+h2_vlan_create()
+{
+ local vid=$1
+
+ vlan_create $h2 $vid
+ simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+ ip link set $h2.$vid type vlan \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+ ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h2_vlan_destroy()
+{
+ local vid=$1
+
+ simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+ vlan_destroy $h2 $vid
+}
+
+vlans_prepare()
+{
+ h1_vlan_create 100
+ h2_vlan_create 100
+
+ tc qdisc add dev ${h1}.100 clsact
+ tc filter add dev ${h1}.100 egress protocol ipv4 \
+ flower ip_proto icmp action skbedit priority 3
+ tc filter add dev ${h1}.100 egress protocol ipv6 \
+ flower ip_proto icmpv6 action skbedit priority 3
+}
+
+vlans_destroy()
+{
+ tc qdisc del dev ${h1}.100 clsact
+
+ h1_vlan_destroy 100
+ h2_vlan_destroy 100
+}
+
+switch_create()
+{
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+
+ # Ports should trust VLAN PCP even with vlan_filtering=0
+ ip link add br0 type bridge
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+dscp_cs_to_tos()
+{
+ local dscp_cs=$1
+
+ # https://datatracker.ietf.org/doc/html/rfc2474
+ # 4.2.2.1 The Class Selector Codepoints
+ echo $((${dscp_cs} << 5))
+}
+
+run_test()
+{
+ local test_name=$1; shift
+ local if_name=$1; shift
+ local tc=$1; shift
+ local tos=$1; shift
+ local counter_name="rx_green_prio_${tc}"
+ local ipv4_before
+ local ipv4_after
+ local ipv6_before
+ local ipv6_after
+
+ ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+ ping_do ${if_name} $H2_IPV4 "-Q ${tos}"
+ ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+ if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ log_test "IPv4 ${test_name}"
+
+ ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+ ping_do ${if_name} $H2_IPV6 "-Q ${tos}"
+ ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+ if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ log_test "IPv6 ${test_name}"
+}
+
+port_default_prio_get()
+{
+ local if_name=$1
+ local prio
+
+ prio="$(dcb -j app show dev ${if_name} default-prio | \
+ jq '.default_prio[]')"
+ if [ -z "${prio}" ]; then
+ prio=0
+ fi
+
+ echo ${prio}
+}
+
+test_port_default()
+{
+ local orig=$(port_default_prio_get ${swp1})
+ local dmac=$(mac_get ${h2})
+
+ dcb app replace dev ${swp1} default-prio 5
+
+ run_test "Port-default QoS classification" ${h1} 5 0
+
+ dcb app replace dev ${swp1} default-prio ${orig}
+}
+
+test_vlan_pcp()
+{
+ vlans_prepare
+
+ run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0
+
+ vlans_destroy
+}
+
+test_ip_dscp()
+{
+ local port_default=$(port_default_prio_get ${swp1})
+ local tos=$(dscp_cs_to_tos 4)
+
+ dcb app add dev ${swp1} dscp-prio CS4:4
+ run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos}
+ dcb app del dev ${swp1} dscp-prio CS4:4
+
+ vlans_prepare
+ run_test "Untrusted DSCP QoS classification follows VLAN PCP" \
+ ${h1}.100 3 ${tos}
+ vlans_destroy
+
+ run_test "Untrusted DSCP QoS classification follows port default" \
+ ${h1} ${port_default} ${tos}
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_port_default
+ test_vlan_pcp
+ test_ip_dscp
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
new file mode 100755
index 000000000000..5a5cee92c665
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+# Note: On LS1028A, in lack of enough user ports, this setup requires patching
+# the device tree to use the second CPU port as a user port
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/tsn_lib.sh
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+# Tunables
+NUM_PKTS=1000
+STREAM_VID=100
+STREAM_PRIO=6
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2))
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Chain number exported by the ocelot driver for
+# Per-Stream Filtering and Policing filters
+PSFP()
+{
+ echo 30000
+}
+
+psfp_chain_create()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+
+ tc filter add dev $if_name ingress chain 0 pref 49152 flower \
+ skip_sw action goto chain $(PSFP)
+}
+
+psfp_chain_destroy()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+}
+
+psfp_filter_check()
+{
+ local expected=$1
+ local packets=""
+ local drops=""
+ local stats=""
+
+ stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1)
+ packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets")
+ drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops")
+
+ if ! [ "${packets}" = "${expected}" ]; then
+ printf "Expected filter to match on %d packets but matched on %d instead\n" \
+ "${expected}" "${packets}"
+ fi
+
+ echo "Hardware filter reports ${drops} drops"
+}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+
+ bridge vlan add dev ${swp2} vid ${STREAM_VID}
+ bridge vlan add dev ${swp1} vid ${STREAM_VID}
+ # PSFP on Ocelot requires the filter to also be added to the bridge
+ # FDB, and not be removed
+ bridge fdb add dev ${swp2} \
+ ${h2_mac_addr} vlan ${STREAM_VID} static master
+
+ psfp_chain_create ${swp1}
+
+ tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \
+ protocol 802.1Q flower skip_sw \
+ dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \
+ action gate base-time 0.000000000 \
+ sched-entry OPEN ${GATE_DURATION_NS} -1 -1 \
+ sched-entry CLOSE ${GATE_DURATION_NS} -1 -1
+}
+
+switch_destroy()
+{
+ psfp_chain_destroy ${swp1}
+ ip link del br0
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev ${if_name} clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev ${if_name} egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev ${if_name} egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \
+ clockid CLOCK_TAI offload delta ${FUDGE_FACTOR}
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev ${if_name} root
+ tc qdisc del dev ${if_name} clsact
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup ${h1}
+
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start ${swp1} ${UDS_ADDRESS_SWP1}
+
+ # Assumption true for LS1028A: h1 and h2 use the same PHC. So by
+ # synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized
+ # to swp1 (and both to CLOCK_REALTIME).
+ ptp4l_start ${h1} true ${UDS_ADDRESS_H1}
+ ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1}
+
+ # Make sure there are no filter matches at the beginning of the test
+ psfp_filter_check 0
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ptp4l_stop ${swp1}
+ ptp4l_stop ${h1}
+ phc2sys_stop
+ isochron_recv_stop
+
+ txtime_cleanup ${h1}
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+debug_incorrectly_dropped_packets()
+{
+ local isochron_dat=$1
+ local dropped_seqids
+ local seqid
+
+ echo "Packets incorrectly dropped:"
+
+ dropped_seqids=$(isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u RX hw %T\n" \
+ --printf-args "qR" | \
+ grep 'RX hw 0.000000000' | \
+ awk '{print $1}')
+
+ for seqid in ${dropped_seqids}; do
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --start ${seqid} --stop ${seqid} \
+ --printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \
+ --printf-args "qST"
+ done
+}
+
+debug_incorrectly_received_packets()
+{
+ local isochron_dat=$1
+
+ echo "Packets incorrectly received:"
+
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \
+ --printf-args "qSTR" |
+ grep -v 'HW RX timestamp 0.000000000'
+}
+
+run_test()
+{
+ local base_time=$1
+ local expected=$2
+ local test_name=$3
+ local debug=$4
+ local isochron_dat="$(mktemp)"
+ local extra_args=""
+ local received
+
+ isochron_do \
+ "${h1}" \
+ "${h2}" \
+ "${UDS_ADDRESS_H1}" \
+ "" \
+ "${base_time}" \
+ "${CYCLE_TIME_NS}" \
+ "${SHIFT_TIME_NS}" \
+ "${NUM_PKTS}" \
+ "${STREAM_VID}" \
+ "${STREAM_PRIO}" \
+ "" \
+ "${isochron_dat}"
+
+ # Count all received packets by looking at the non-zero RX timestamps
+ received=$(isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u\n" --printf-args "R" | \
+ grep -w -v '0' | wc -l)
+
+ if [ "${received}" = "${expected}" ]; then
+ RET=0
+ else
+ RET=1
+ echo "Expected isochron to receive ${expected} packets but received ${received}"
+ fi
+
+ log_test "${test_name}"
+
+ if [ "$RET" = "1" ]; then
+ ${debug} "${isochron_dat}"
+ fi
+
+ rm ${isochron_dat} 2> /dev/null
+}
+
+test_gate_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 ${NUM_PKTS} "In band" \
+ debug_incorrectly_dropped_packets
+
+ psfp_filter_check ${NUM_PKTS}
+}
+
+test_gate_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 0 "Out of band" \
+ debug_incorrectly_received_packets
+
+ psfp_filter_check $((2 * ${NUM_PKTS}))
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_gate_in_band
+ test_gate_out_of_band
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index 10e54bcca7a9..4401a654c2c0 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -215,15 +215,15 @@ test_vlan_pop()
sleep 1
- tcpdump_stop
+ tcpdump_stop $eth2
- if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
+ if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
echo "OK"
else
echo "FAIL"
fi
- tcpdump_cleanup
+ tcpdump_cleanup $eth2
}
test_vlan_push()
@@ -236,15 +236,15 @@ test_vlan_push()
sleep 1
- tcpdump_stop
+ tcpdump_stop $eth3.100
- if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then
+ if tcpdump_show $eth3.100 | grep -q "$eth2_mac > $eth3_mac"; then
echo "OK"
else
echo "FAIL"
fi
- tcpdump_cleanup
+ tcpdump_cleanup $eth3.100
}
test_vlan_ingress_modify()
@@ -267,15 +267,15 @@ test_vlan_ingress_modify()
sleep 1
- tcpdump_stop
+ tcpdump_stop $eth2
- if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+ if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
echo "OK"
else
echo "FAIL"
fi
- tcpdump_cleanup
+ tcpdump_cleanup $eth2
tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
@@ -305,15 +305,15 @@ test_vlan_egress_modify()
sleep 1
- tcpdump_stop
+ tcpdump_stop $eth2
- if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+ if tcpdump_show $eth2 | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
echo "OK"
else
echo "FAIL"
fi
- tcpdump_cleanup
+ tcpdump_cleanup $eth2
tc filter del dev $eth1 egress chain $(ES0) pref 3
tc qdisc del dev $eth1 clsact
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 0f2ebc38d893..0fbdacfdcd6a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -36,6 +36,7 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
+TEST_PROGS += ndisc_unsolicited_na_test.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 4f70baad867d..bbe3b379927a 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -20,6 +20,7 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
+TESTS="fib_rule6 fib_rule4"
log_test()
{
@@ -316,7 +317,16 @@ fi
# start clean
cleanup &> /dev/null
setup
-run_fibrule_tests
+for t in $TESTS
+do
+ case $t in
+ fib_rule6_test|fib_rule6) fib_rule6_test;;
+ fib_rule4_test|fib_rule4) fib_rule4_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+
+ esac
+done
cleanup
if [ "$TESTS" != "none" ]; then
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index c87e674b61b1..0912f5ae7f6b 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,6 +2,7 @@
TEST_PROGS = bridge_igmp.sh \
bridge_locked_port.sh \
+ bridge_mdb.sh \
bridge_mld.sh \
bridge_port_isolation.sh \
bridge_sticky_fdb.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
new file mode 100755
index 000000000000..b1ba6876dd86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that adding host mdb entries work as intended for all types of
+# multicast filters: ipv4, ipv6, and mac
+
+ALL_TESTS="mdb_add_del_test"
+NUM_NETIFS=2
+
+TEST_GROUP_IP4="225.1.2.3"
+TEST_GROUP_IP6="ff02::42"
+TEST_GROUP_MAC="01:00:01:c0:ff:ee"
+
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Enable multicast filtering
+ ip link add dev br0 type bridge mcast_snooping 1
+
+ ip link set dev $swp1 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp1 down
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+do_mdb_add_del()
+{
+ local group=$1
+ local flag=$2
+
+ RET=0
+ bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
+ check_err $? "Failed adding $group to br0, port br0"
+
+ if [ -z "$flag" ]; then
+ flag="temp"
+ fi
+
+ bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
+ check_err $? "$group not added with $flag flag"
+
+ bridge mdb del dev br0 port br0 grp $group 2>/dev/null
+ check_err $? "Failed deleting $group from br0, port br0"
+
+ bridge mdb show dev br0 | grep -q $group >/dev/null
+ check_err_fail 1 $? "$group still in mdb after delete"
+
+ log_test "MDB add/del group $group to bridge port br0"
+}
+
+mdb_add_del_test()
+{
+ do_mdb_add_del $TEST_GROUP_MAC permanent
+ do_mdb_add_del $TEST_GROUP_IP4
+ do_mdb_add_del $TEST_GROUP_IP6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 664b9ecaf228..66681a2bcdd3 100644..100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -27,6 +27,9 @@ INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
REQUIRE_JQ=${REQUIRE_JQ:=yes}
REQUIRE_MZ=${REQUIRE_MZ:=yes}
+REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
+STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
+TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -159,6 +162,12 @@ fi
if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
+if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
+ # https://github.com/vladimiroltean/mtools/
+ # patched for IPv6 support
+ require_command msend
+ require_command mreceive
+fi
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""
@@ -214,10 +223,41 @@ create_netif()
esac
}
+declare -A MAC_ADDR_ORIG
+mac_addr_prepare()
+{
+ local new_addr=
+ local dev=
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ dev=${NETIFS[p$i]}
+ new_addr=$(printf "00:01:02:03:04:%02x" $i)
+
+ MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
+ # Strip quotes
+ MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
+ ip link set dev $dev address $new_addr
+ done
+}
+
+mac_addr_restore()
+{
+ local dev=
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ dev=${NETIFS[p$i]}
+ ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
+ done
+}
+
if [[ "$NETIF_CREATE" = "yes" ]]; then
create_netif
fi
+if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+ mac_addr_prepare
+fi
+
for ((i = 1; i <= NUM_NETIFS; ++i)); do
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
@@ -503,6 +543,10 @@ pre_cleanup()
echo "Pausing before cleanup, hit any key to continue"
read
fi
+
+ if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+ mac_addr_restore
+ fi
}
vrf_prepare()
@@ -824,6 +868,15 @@ mac_get()
ip -j link show dev $if_name | jq -r '.[]["address"]'
}
+ipv6_lladdr_get()
+{
+ local if_name=$1
+
+ ip -j addr show dev $if_name | \
+ jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
+ head -1
+}
+
bridge_ageing_time_get()
{
local bridge=$1
@@ -1322,25 +1375,40 @@ flood_test()
__start_traffic()
{
+ local pktsize=$1; shift
local proto=$1; shift
local h_in=$1; shift # Where the traffic egresses the host
local sip=$1; shift
local dip=$1; shift
local dmac=$1; shift
- $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+ $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
-a own -b $dmac -t "$proto" -q "$@" &
sleep 1
}
+start_traffic_pktsize()
+{
+ local pktsize=$1; shift
+
+ __start_traffic $pktsize udp "$@"
+}
+
+start_tcp_traffic_pktsize()
+{
+ local pktsize=$1; shift
+
+ __start_traffic $pktsize tcp "$@"
+}
+
start_traffic()
{
- __start_traffic udp "$@"
+ start_traffic_pktsize 8000 "$@"
}
start_tcp_traffic()
{
- __start_traffic tcp "$@"
+ start_tcp_traffic_pktsize 8000 "$@"
}
stop_traffic()
@@ -1349,13 +1417,17 @@ stop_traffic()
{ kill %% && wait %%; } 2>/dev/null
}
+declare -A cappid
+declare -A capfile
+declare -A capout
+
tcpdump_start()
{
local if_name=$1; shift
local ns=$1; shift
- capfile=$(mktemp)
- capout=$(mktemp)
+ capfile[$if_name]=$(mktemp)
+ capout[$if_name]=$(mktemp)
if [ -z $ns ]; then
ns_cmd=""
@@ -1369,27 +1441,35 @@ tcpdump_start()
capuser="-Z $SUDO_USER"
fi
- $ns_cmd tcpdump -e -n -Q in -i $if_name \
- -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
- cappid=$!
+ $ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
+ -s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
+ > "${capout[$if_name]}" 2>&1 &
+ cappid[$if_name]=$!
sleep 1
}
tcpdump_stop()
{
- $ns_cmd kill $cappid
+ local if_name=$1
+ local pid=${cappid[$if_name]}
+
+ $ns_cmd kill "$pid" && wait "$pid"
sleep 1
}
tcpdump_cleanup()
{
- rm $capfile $capout
+ local if_name=$1
+
+ rm ${capfile[$if_name]} ${capout[$if_name]}
}
tcpdump_show()
{
- tcpdump -e -n -r $capfile 2>&1
+ local if_name=$1
+
+ tcpdump -e -n -r ${capfile[$if_name]} 2>&1
}
# return 0 if the packet wasn't seen on host2_if or 1 if it was
@@ -1499,6 +1579,37 @@ brmcast_check_sg_state()
done
}
+mc_join()
+{
+ local if_name=$1
+ local group=$2
+ local vrf_name=$(master_name_get $if_name)
+
+ # We don't care about actual reception, just about joining the
+ # IP multicast group and adding the L2 address to the device's
+ # MAC filtering table
+ ip vrf exec $vrf_name \
+ mreceive -g $group -I $if_name > /dev/null 2>&1 &
+ mreceive_pid=$!
+
+ sleep 1
+}
+
+mc_leave()
+{
+ kill "$mreceive_pid" && wait "$mreceive_pid"
+}
+
+mc_send()
+{
+ local if_name=$1
+ local groups=$2
+ local vrf_name=$(master_name_get $if_name)
+
+ ip vrf exec $vrf_name \
+ msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
+}
+
start_ip_monitor()
{
local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
new file mode 100755
index 000000000000..c5b0cbc85b3e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone bridge"
+NUM_NETIFS=2
+PING_COUNT=1
+REQUIRE_MTOOLS=yes
+REQUIRE_MZ=no
+
+source lib.sh
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+BRIDGE_ADDR="00:00:de:ad:be:ee"
+MACVLAN_ADDR="00:00:de:ad:be:ef"
+UNKNOWN_UC_ADDR1="de:ad:be:ef:ee:03"
+UNKNOWN_UC_ADDR2="de:ad:be:ef:ee:04"
+UNKNOWN_UC_ADDR3="de:ad:be:ef:ee:05"
+JOINED_IPV4_MC_ADDR="225.1.2.3"
+UNKNOWN_IPV4_MC_ADDR1="225.1.2.4"
+UNKNOWN_IPV4_MC_ADDR2="225.1.2.5"
+UNKNOWN_IPV4_MC_ADDR3="225.1.2.6"
+JOINED_IPV6_MC_ADDR="ff2e::0102:0304"
+UNKNOWN_IPV6_MC_ADDR1="ff2e::0102:0305"
+UNKNOWN_IPV6_MC_ADDR2="ff2e::0102:0306"
+UNKNOWN_IPV6_MC_ADDR3="ff2e::0102:0307"
+
+JOINED_MACV4_MC_ADDR="01:00:5e:01:02:03"
+UNKNOWN_MACV4_MC_ADDR1="01:00:5e:01:02:04"
+UNKNOWN_MACV4_MC_ADDR2="01:00:5e:01:02:05"
+UNKNOWN_MACV4_MC_ADDR3="01:00:5e:01:02:06"
+JOINED_MACV6_MC_ADDR="33:33:01:02:03:04"
+UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
+UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
+UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
+
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# Disable promisc to ensure we don't receive unknown MAC DA packets
+export TCPDUMP_EXTRA_FLAGS="-pl"
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+send_non_ip()
+{
+ local if_name=$1
+ local smac=$2
+ local dmac=$3
+
+ $MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+ local if_name=$1
+ local dmac=$2
+
+ ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+ ping_do $if_name $H2_IPV4
+ ip neigh del $H2_IPV4 dev $if_name
+}
+
+check_rcv()
+{
+ local if_name=$1
+ local type=$2
+ local pattern=$3
+ local should_receive=$4
+ local should_fail=
+
+ [ $should_receive = true ] && should_fail=0 || should_fail=1
+ RET=0
+
+ tcpdump_show $if_name | grep -q "$pattern"
+
+ check_err_fail "$should_fail" "$?" "reception"
+
+ log_test "$if_name: $type"
+}
+
+mc_route_prepare()
+{
+ local if_name=$1
+ local vrf_name=$(master_name_get $if_name)
+
+ ip route add 225.100.1.0/24 dev $if_name vrf $vrf_name
+ ip -6 route add ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+mc_route_destroy()
+{
+ local if_name=$1
+ local vrf_name=$(master_name_get $if_name)
+
+ ip route del 225.100.1.0/24 dev $if_name vrf $vrf_name
+ ip -6 route del ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+run_test()
+{
+ local rcv_if_name=$1
+ local smac=$(mac_get $h1)
+ local rcv_dmac=$(mac_get $rcv_if_name)
+
+ tcpdump_start $rcv_if_name
+
+ mc_route_prepare $h1
+ mc_route_prepare $rcv_if_name
+
+ send_uc_ipv4 $h1 $rcv_dmac
+ send_uc_ipv4 $h1 $MACVLAN_ADDR
+ send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+
+ ip link set dev $rcv_if_name promisc on
+ send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
+ mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
+ mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+ ip link set dev $rcv_if_name promisc off
+
+ mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
+ mc_send $h1 $JOINED_IPV4_MC_ADDR
+ mc_leave
+
+ mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
+ mc_send $h1 $JOINED_IPV6_MC_ADDR
+ mc_leave
+
+ mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
+ mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+
+ ip link set dev $rcv_if_name allmulticast on
+ send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
+ mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
+ mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+ ip link set dev $rcv_if_name allmulticast off
+
+ mc_route_destroy $rcv_if_name
+ mc_route_destroy $h1
+
+ sleep 1
+
+ tcpdump_stop $rcv_if_name
+
+ check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
+ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
+ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
+
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
+ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ false
+
+ check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
+ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
+
+ check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
+ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+ false
+
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
+ true
+
+ tcpdump_cleanup $rcv_if_name
+}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_create()
+{
+ ip link add br0 type bridge
+ ip link set br0 address $BRIDGE_ADDR
+ ip link set br0 up
+
+ ip link set $h2 master br0
+ ip link set $h2 up
+
+ simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_destroy()
+{
+ simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+
+ ip link del br0
+}
+
+standalone()
+{
+ h1_create
+ h2_create
+
+ ip link add link $h2 name macvlan0 type macvlan mode private
+ ip link set macvlan0 address $MACVLAN_ADDR
+ ip link set macvlan0 up
+
+ run_test $h2
+
+ ip link del macvlan0
+
+ h2_destroy
+ h1_destroy
+}
+
+bridge()
+{
+ h1_create
+ bridge_create
+
+ ip link add link br0 name macvlan0 type macvlan mode private
+ ip link set macvlan0 address $MACVLAN_ADDR
+ ip link set macvlan0 up
+
+ run_test br0
+
+ ip link del macvlan0
+
+ bridge_destroy
+ h1_destroy
+}
+
+cleanup()
+{
+ pre_cleanup
+ vrf_cleanup
+}
+
+setup_prepare()
+{
+ vrf_prepare
+ # setup_wait() needs this
+ ip link set $h1 up
+ ip link set $h2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
new file mode 100755
index 000000000000..af3b398d13f0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone two_bridges one_bridge_two_pvids"
+NUM_NETIFS=4
+
+source lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p3]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+IPV4_ALLNODES="224.0.0.1"
+IPV6_ALLNODES="ff02::1"
+MACV4_ALLNODES="01:00:5e:00:00:01"
+MACV6_ALLNODES="33:33:00:00:00:01"
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# The full 4K VLAN space is too much to check, so strategically pick some
+# values which should provide reasonable coverage
+vids=(0 1 2 5 10 20 50 100 200 500 1000 1000 2000 4000 4094)
+
+send_non_ip()
+{
+ local if_name=$1
+ local smac=$2
+ local dmac=$3
+
+ $MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+ local if_name=$1
+ local dmac=$2
+
+ ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+ ping_do $if_name $H2_IPV4
+ ip neigh del $H2_IPV4 dev $if_name
+}
+
+send_mc_ipv4()
+{
+ local if_name=$1
+
+ ping_do $if_name $IPV4_ALLNODES "-I $if_name"
+}
+
+send_uc_ipv6()
+{
+ local if_name=$1
+ local dmac=$2
+
+ ip -6 neigh add $H2_IPV6 lladdr $dmac dev $if_name
+ ping6_do $if_name $H2_IPV6
+ ip -6 neigh del $H2_IPV6 dev $if_name
+}
+
+send_mc_ipv6()
+{
+ local if_name=$1
+
+ ping6_do $if_name $IPV6_ALLNODES%$if_name
+}
+
+check_rcv()
+{
+ local if_name=$1
+ local type=$2
+ local pattern=$3
+ local should_fail=1
+
+ RET=0
+
+ tcpdump_show $if_name | grep -q "$pattern"
+
+ check_err_fail "$should_fail" "$?" "reception"
+
+ log_test "$type"
+}
+
+run_test()
+{
+ local test_name="$1"
+ local smac=$(mac_get $h1)
+ local dmac=$(mac_get $h2)
+ local h1_ipv6_lladdr=$(ipv6_lladdr_get $h1)
+ local vid=
+
+ echo "$test_name: Sending packets"
+
+ tcpdump_start $h2
+
+ send_non_ip $h1 $smac $dmac
+ send_non_ip $h1 $smac $NON_IP_MC
+ send_non_ip $h1 $smac $BC
+ send_uc_ipv4 $h1 $dmac
+ send_mc_ipv4 $h1
+ send_uc_ipv6 $h1 $dmac
+ send_mc_ipv6 $h1
+
+ for vid in "${vids[@]}"; do
+ vlan_create $h1 $vid
+ simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+
+ send_non_ip $h1.$vid $smac $dmac
+ send_non_ip $h1.$vid $smac $NON_IP_MC
+ send_non_ip $h1.$vid $smac $BC
+ send_uc_ipv4 $h1.$vid $dmac
+ send_mc_ipv4 $h1.$vid
+ send_uc_ipv6 $h1.$vid $dmac
+ send_mc_ipv6 $h1.$vid
+
+ simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ vlan_destroy $h1 $vid
+ done
+
+ sleep 1
+
+ echo "$test_name: Checking which packets were received"
+
+ tcpdump_stop $h2
+
+ check_rcv $h2 "$test_name: Unicast non-IP untagged" \
+ "$smac > $dmac, 802.3, length 4:"
+
+ check_rcv $h2 "$test_name: Multicast non-IP untagged" \
+ "$smac > $NON_IP_MC, 802.3, length 4:"
+
+ check_rcv $h2 "$test_name: Broadcast non-IP untagged" \
+ "$smac > $BC, 802.3, length 4:"
+
+ check_rcv $h2 "$test_name: Unicast IPv4 untagged" \
+ "$smac > $dmac, ethertype IPv4 (0x0800)"
+
+ check_rcv $h2 "$test_name: Multicast IPv4 untagged" \
+ "$smac > $MACV4_ALLNODES, ethertype IPv4 (0x0800).*: $H1_IPV4 > $IPV4_ALLNODES"
+
+ check_rcv $h2 "$test_name: Unicast IPv6 untagged" \
+ "$smac > $dmac, ethertype IPv6 (0x86dd).*8: $H1_IPV6 > $H2_IPV6"
+
+ check_rcv $h2 "$test_name: Multicast IPv6 untagged" \
+ "$smac > $MACV6_ALLNODES, ethertype IPv6 (0x86dd).*: $h1_ipv6_lladdr > $IPV6_ALLNODES"
+
+ for vid in "${vids[@]}"; do
+ check_rcv $h2 "$test_name: Unicast non-IP VID $vid" \
+ "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+ check_rcv $h2 "$test_name: Multicast non-IP VID $vid" \
+ "$smac > $NON_IP_MC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+ check_rcv $h2 "$test_name: Broadcast non-IP VID $vid" \
+ "$smac > $BC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+ check_rcv $h2 "$test_name: Unicast IPv4 VID $vid" \
+ "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $H2_IPV4"
+
+ check_rcv $h2 "$test_name: Multicast IPv4 VID $vid" \
+ "$smac > $MACV4_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $IPV4_ALLNODES"
+
+ check_rcv $h2 "$test_name: Unicast IPv6 VID $vid" \
+ "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $H1_IPV6 > $H2_IPV6"
+
+ check_rcv $h2 "$test_name: Multicast IPv6 VID $vid" \
+ "$smac > $MACV6_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $h1_ipv6_lladdr > $IPV6_ALLNODES"
+ done
+
+ tcpdump_cleanup $h2
+}
+
+standalone()
+{
+ run_test "Standalone switch ports"
+}
+
+two_bridges()
+{
+ ip link add br0 type bridge && ip link set br0 up
+ ip link add br1 type bridge && ip link set br1 up
+ ip link set $swp1 master br0
+ ip link set $swp2 master br1
+
+ run_test "Switch ports in different bridges"
+
+ ip link del br1
+ ip link del br0
+}
+
+one_bridge_two_pvids()
+{
+ ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set br0 up
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+ bridge vlan add dev $swp1 vid 2 pvid untagged
+
+ run_test "Switch ports in VLAN-aware bridge with different PVIDs"
+
+ ip link del br0
+}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ # we call simple_if_init from the test itself, but setup_wait expects
+ # that we call it from here, and waits until the interfaces are up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index 057f91b05098..b98ea9449b8b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1 + | | + $h2 |
+# | 192.0.2.2/24 | | | | 198.51.100.2/24 |
+# | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | 192.0.2.1/24 198.51.100.1/24 |
+# | 2001:db8:1::1/64 2001:db8:2::1/64 |
+# | |
+# +-----------------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh
index a7306c7ac06d..865c9f7d8143 100755
--- a/tools/testing/selftests/net/forwarding/router_vid_1.sh
+++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh
@@ -1,7 +1,32 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6"
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.1 + | | + $h2.1 |
+# | 192.0.2.2/24 | | | | 198.51.100.2/24 |
+# | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | | | |
+# | $rp1.1 + + $rp2.1 |
+# | 192.0.2.1/24 198.51.100.1/24 |
+# | 2001:db8:1::1/64 2001:db8:2::1/64 |
+# | |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
NUM_NETIFS=4
source lib.sh
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
new file mode 100644
index 000000000000..60a1423e8116
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -0,0 +1,235 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
+REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
+
+# Tunables
+UTC_TAI_OFFSET=37
+ISOCHRON_CPU=1
+
+if [[ "$REQUIRE_ISOCHRON" = "yes" ]]; then
+ # https://github.com/vladimiroltean/tsn-scripts
+ # WARNING: isochron versions pre-1.0 are unstable,
+ # always use the latest version
+ require_command isochron
+fi
+if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
+ require_command phc2sys
+ require_command ptp4l
+fi
+
+phc2sys_start()
+{
+ local if_name=$1
+ local uds_address=$2
+ local extra_args=""
+
+ if ! [ -z "${uds_address}" ]; then
+ extra_args="${extra_args} -z ${uds_address}"
+ fi
+
+ phc2sys_log="$(mktemp)"
+
+ chrt -f 10 phc2sys -m \
+ -c ${if_name} \
+ -s CLOCK_REALTIME \
+ -O ${UTC_TAI_OFFSET} \
+ --step_threshold 0.00002 \
+ --first_step_threshold 0.00002 \
+ ${extra_args} \
+ > "${phc2sys_log}" 2>&1 &
+ phc2sys_pid=$!
+
+ echo "phc2sys logs to ${phc2sys_log} and has pid ${phc2sys_pid}"
+
+ sleep 1
+}
+
+phc2sys_stop()
+{
+ { kill ${phc2sys_pid} && wait ${phc2sys_pid}; } 2> /dev/null
+ rm "${phc2sys_log}" 2> /dev/null
+}
+
+ptp4l_start()
+{
+ local if_name=$1
+ local slave_only=$2
+ local uds_address=$3
+ local log="ptp4l_log_${if_name}"
+ local pid="ptp4l_pid_${if_name}"
+ local extra_args=""
+
+ if [ "${slave_only}" = true ]; then
+ extra_args="${extra_args} -s"
+ fi
+
+ # declare dynamic variables ptp4l_log_${if_name} and ptp4l_pid_${if_name}
+ # as global, so that they can be referenced later
+ declare -g "${log}=$(mktemp)"
+
+ chrt -f 10 ptp4l -m -2 -P \
+ -i ${if_name} \
+ --step_threshold 0.00002 \
+ --first_step_threshold 0.00002 \
+ --tx_timestamp_timeout 100 \
+ --uds_address="${uds_address}" \
+ ${extra_args} \
+ > "${!log}" 2>&1 &
+ declare -g "${pid}=$!"
+
+ echo "ptp4l for interface ${if_name} logs to ${!log} and has pid ${!pid}"
+
+ sleep 1
+}
+
+ptp4l_stop()
+{
+ local if_name=$1
+ local log="ptp4l_log_${if_name}"
+ local pid="ptp4l_pid_${if_name}"
+
+ { kill ${!pid} && wait ${!pid}; } 2> /dev/null
+ rm "${!log}" 2> /dev/null
+}
+
+cpufreq_max()
+{
+ local cpu=$1
+ local freq="cpu${cpu}_freq"
+ local governor="cpu${cpu}_governor"
+
+ # Kernel may be compiled with CONFIG_CPU_FREQ disabled
+ if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+ return
+ fi
+
+ # declare dynamic variables cpu${cpu}_freq and cpu${cpu}_governor as
+ # global, so they can be referenced later
+ declare -g "${freq}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq)"
+ declare -g "${governor}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor)"
+
+ cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_max_freq > \
+ /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+ echo -n "performance" > \
+ /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+cpufreq_restore()
+{
+ local cpu=$1
+ local freq="cpu${cpu}_freq"
+ local governor="cpu${cpu}_governor"
+
+ if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+ return
+ fi
+
+ echo "${!freq}" > /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+ echo -n "${!governor}" > \
+ /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+isochron_recv_start()
+{
+ local if_name=$1
+ local uds=$2
+ local extra_args=$3
+
+ if ! [ -z "${uds}" ]; then
+ extra_args="--unix-domain-socket ${uds}"
+ fi
+
+ isochron rcv \
+ --interface ${if_name} \
+ --sched-priority 98 \
+ --sched-fifo \
+ --utc-tai-offset ${UTC_TAI_OFFSET} \
+ --quiet \
+ ${extra_args} & \
+ isochron_pid=$!
+
+ sleep 1
+}
+
+isochron_recv_stop()
+{
+ { kill ${isochron_pid} && wait ${isochron_pid}; } 2> /dev/null
+}
+
+isochron_do()
+{
+ local sender_if_name=$1; shift
+ local receiver_if_name=$1; shift
+ local sender_uds=$1; shift
+ local receiver_uds=$1; shift
+ local base_time=$1; shift
+ local cycle_time=$1; shift
+ local shift_time=$1; shift
+ local num_pkts=$1; shift
+ local vid=$1; shift
+ local priority=$1; shift
+ local dst_ip=$1; shift
+ local isochron_dat=$1; shift
+ local extra_args=""
+ local receiver_extra_args=""
+ local vrf="$(master_name_get ${sender_if_name})"
+ local use_l2="true"
+
+ if ! [ -z "${dst_ip}" ]; then
+ use_l2="false"
+ fi
+
+ if ! [ -z "${vrf}" ]; then
+ dst_ip="${dst_ip}%${vrf}"
+ fi
+
+ if ! [ -z "${vid}" ]; then
+ vid="--vid=${vid}"
+ fi
+
+ if [ -z "${receiver_uds}" ]; then
+ extra_args="${extra_args} --omit-remote-sync"
+ fi
+
+ if ! [ -z "${shift_time}" ]; then
+ extra_args="${extra_args} --shift-time=${shift_time}"
+ fi
+
+ if [ "${use_l2}" = "true" ]; then
+ extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
+ receiver_extra_args="--l2 --etype=0xdead"
+ else
+ extra_args="${extra_args} --l4 --ip-destination=${dst_ip}"
+ receiver_extra_args="--l4"
+ fi
+
+ cpufreq_max ${ISOCHRON_CPU}
+
+ isochron_recv_start "${h2}" "${receiver_uds}" "${receiver_extra_args}"
+
+ isochron send \
+ --interface ${sender_if_name} \
+ --unix-domain-socket ${sender_uds} \
+ --priority ${priority} \
+ --base-time ${base_time} \
+ --cycle-time ${cycle_time} \
+ --num-frames ${num_pkts} \
+ --frame-size 64 \
+ --txtime \
+ --utc-tai-offset ${UTC_TAI_OFFSET} \
+ --cpu-mask $((1 << ${ISOCHRON_CPU})) \
+ --sched-fifo \
+ --sched-priority 98 \
+ --client 127.0.0.1 \
+ --sync-threshold 5000 \
+ --output-file ${isochron_dat} \
+ ${extra_args} \
+ --quiet
+
+ isochron_recv_stop
+
+ cpufreq_restore ${ISOCHRON_CPU}
+}
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index d36b7da5082a..38021a0dd527 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -12,6 +12,9 @@ CONFIG_NF_TABLES=m
CONFIG_NFT_COMPAT=m
CONFIG_NETFILTER_XTABLES=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_TPROXY=m
CONFIG_NFT_SOCKET=m
@@ -19,3 +22,8 @@ CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_SCH_INGRESS=m
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index ff821025d309..9dd43d7d957b 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -71,6 +71,43 @@ chk_msk_remote_key_nr()
__chk_nr "grep -c remote_key" $*
}
+__chk_listen()
+{
+ local filter="$1"
+ local expected=$2
+
+ shift 2
+ msg=$*
+
+ nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN)
+ printf "%-50s" "$msg"
+
+ if [ $nr != $expected ]; then
+ echo "[ fail ] expected $expected found $nr"
+ ret=$test_cnt
+ else
+ echo "[ ok ]"
+ fi
+}
+
+chk_msk_listen()
+{
+ lport=$1
+ local msg="check for listen socket"
+
+ # destination port search should always return empty list
+ __chk_listen "dport $lport" 0 "listen match for dport $lport"
+
+ # should return 'our' mptcp listen socket
+ __chk_listen "sport $lport" 1 "listen match for sport $lport"
+
+ __chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport"
+
+ __chk_listen "" 1 "all listen sockets"
+
+ nr=$(ss -Ml $filter | wc -l)
+}
+
# $1: ns, $2: port
wait_local_port_listen()
{
@@ -113,6 +150,7 @@ echo "a" | \
0.0.0.0 >/dev/null &
wait_local_port_listen $ns 10000
chk_msk_nr 0 "no msk on netns creation"
+chk_msk_listen 10000
echo "b" | \
timeout ${timeout_test} \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 7314257d248a..d1de1e7702fb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -70,6 +70,7 @@ init_partial()
ip netns add $netns || exit $ksft_skip
ip -net $netns link set lo up
ip netns exec $netns sysctl -q net.mptcp.enabled=1
+ ip netns exec $netns sysctl -q net.mptcp.pm_type=0
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
if [ $checksum -eq 1 ]; then
@@ -266,6 +267,58 @@ reset_with_allow_join_id0()
ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
}
+# Modify TCP payload without corrupting the TCP packet
+#
+# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets
+# carrying enough data.
+# Once it is done, the TCP Checksum field is updated so the packet is still
+# considered as valid at the TCP level.
+# Because the MPTCP checksum, covering the TCP options and data, has not been
+# updated, the modification will be detected and an MP_FAIL will be emitted:
+# what we want to validate here without corrupting "random" MPTCP options.
+#
+# To avoid having tc producing this pr_info() message for each TCP ACK packets
+# not carrying enough data:
+#
+# tc action pedit offset 162 out of bounds
+#
+# Netfilter is used to mark packets with enough data.
+reset_with_fail()
+{
+ reset "${1}" || return 1
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
+
+ check_invert=1
+ validate_checksum=1
+ local i="$2"
+ local ip="${3:-4}"
+ local tables
+
+ tables="iptables"
+ if [ $ip -eq 6 ]; then
+ tables="ip6tables"
+ fi
+
+ ip netns exec $ns2 $tables \
+ -t mangle \
+ -A OUTPUT \
+ -o ns2eth$i \
+ -p tcp \
+ -m length --length 150:9999 \
+ -m statistic --mode nth --packet 1 --every 99999 \
+ -j MARK --set-mark 42 || exit 1
+
+ tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1
+ tc -n $ns2 filter add dev ns2eth$i egress \
+ protocol ip prio 1000 \
+ handle 42 fw \
+ action pedit munge offset 148 u8 invert \
+ pipe csum tcp \
+ index 100 || exit 1
+}
+
fail_test()
{
ret=1
@@ -961,6 +1014,7 @@ chk_csum_nr()
local csum_ns2=${2:-0}
local count
local dump_stats
+ local extra_msg=""
local allow_multi_errors_ns1=0
local allow_multi_errors_ns2=0
@@ -976,6 +1030,9 @@ chk_csum_nr()
printf "%-${nr_blank}s %s" " " "sum"
count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
[ -z "$count" ] && count=0
+ if [ "$count" != "$csum_ns1" ]; then
+ extra_msg="$extra_msg ns1=$count"
+ fi
if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
{ [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
echo "[fail] got $count data checksum error[s] expected $csum_ns1"
@@ -987,28 +1044,58 @@ chk_csum_nr()
echo -n " - csum "
count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
[ -z "$count" ] && count=0
+ if [ "$count" != "$csum_ns2" ]; then
+ extra_msg="$extra_msg ns2=$count"
+ fi
if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
{ [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
echo "[fail] got $count data checksum error[s] expected $csum_ns2"
fail_test
dump_stats=1
else
- echo "[ ok ]"
+ echo -n "[ ok ]"
fi
[ "${dump_stats}" = 1 ] && dump_stats
+
+ echo "$extra_msg"
}
chk_fail_nr()
{
local fail_tx=$1
local fail_rx=$2
+ local ns_invert=${3:-""}
local count
local dump_stats
+ local ns_tx=$ns1
+ local ns_rx=$ns2
+ local extra_msg=""
+ local allow_tx_lost=0
+ local allow_rx_lost=0
+
+ if [[ $ns_invert = "invert" ]]; then
+ ns_tx=$ns2
+ ns_rx=$ns1
+ extra_msg=" invert"
+ fi
+
+ if [[ "${fail_tx}" = "-"* ]]; then
+ allow_tx_lost=1
+ fail_tx=${fail_tx:1}
+ fi
+ if [[ "${fail_rx}" = "-"* ]]; then
+ allow_rx_lost=1
+ fail_rx=${fail_rx:1}
+ fi
printf "%-${nr_blank}s %s" " " "ftx"
- count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
+ count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ "$count" != "$fail_tx" ]; then
+ extra_msg="$extra_msg,tx=$count"
+ fi
+ if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
+ { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
fail_test
dump_stats=1
@@ -1017,17 +1104,23 @@ chk_fail_nr()
fi
echo -n " - failrx"
- count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
+ count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ "$count" != "$fail_rx" ]; then
+ extra_msg="$extra_msg,rx=$count"
+ fi
+ if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
+ { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
fail_test
dump_stats=1
else
- echo "[ ok ]"
+ echo -n "[ ok ]"
fi
[ "${dump_stats}" = 1 ] && dump_stats
+
+ echo "$extra_msg"
}
chk_fclose_nr()
@@ -1106,6 +1199,38 @@ chk_rst_nr()
echo "$extra_msg"
}
+chk_infi_nr()
+{
+ local infi_tx=$1
+ local infi_rx=$2
+ local count
+ local dump_stats
+
+ printf "%-${nr_blank}s %s" " " "itx"
+ count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$infi_tx" ]; then
+ echo "[fail] got $count infinite map[s] TX expected $infi_tx"
+ fail_test
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - infirx"
+ count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$infi_rx" ]; then
+ echo "[fail] got $count infinite map[s] RX expected $infi_rx"
+ fail_test
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ [ "${dump_stats}" = 1 ] && dump_stats
+}
+
chk_join_nr()
{
local syn_nr=$1
@@ -1115,7 +1240,8 @@ chk_join_nr()
local csum_ns2=${5:-0}
local fail_nr=${6:-0}
local rst_nr=${7:-0}
- local corrupted_pkts=${8:-0}
+ local infi_nr=${8:-0}
+ local corrupted_pkts=${9:-0}
local count
local dump_stats
local with_cookie
@@ -1166,10 +1292,11 @@ chk_join_nr()
echo "[ ok ]"
fi
[ "${dump_stats}" = 1 ] && dump_stats
- if [ $checksum -eq 1 ]; then
+ if [ $validate_checksum -eq 1 ]; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
chk_rst_nr $rst_nr $rst_nr
+ chk_infi_nr $infi_nr $infi_nr
fi
}
@@ -1485,6 +1612,13 @@ wait_attempt_fail()
return 1
}
+set_userspace_pm()
+{
+ local ns=$1
+
+ ip netns exec $ns sysctl -q net.mptcp.pm_type=1
+}
+
subflows_tests()
{
if reset "no JOIN"; then
@@ -2556,6 +2690,90 @@ fastclose_tests()
fi
}
+pedit_action_pkts()
+{
+ tc -n $ns2 -j -s action show action pedit index 100 | \
+ sed 's/.*"packets":\([0-9]\+\),.*/\1/'
+}
+
+fail_tests()
+{
+ # single subflow
+ if reset_with_fail "Infinite map" 1; then
+ run_tests $ns1 $ns2 10.0.1.1 128
+ chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+ chk_fail_nr 1 -1 invert
+ fi
+}
+
+userspace_tests()
+{
+ # userspace pm type prevents add_addr
+ if reset "userspace pm type prevents add_addr"; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_add_nr 0 0
+ fi
+
+ # userspace pm type does not echo add_addr without daemon
+ if reset "userspace pm no echo w/o daemon"; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_add_nr 1 0
+ fi
+
+ # userspace pm type rejects join
+ if reset "userspace pm type rejects join"; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 0
+ fi
+
+ # userspace pm type does not send join
+ if reset "userspace pm type does not send join"; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # userspace pm type prevents mp_prio
+ if reset "userspace pm type prevents mp_prio"; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+ chk_join_nr 1 1 0
+ chk_prio_nr 0 0
+ fi
+
+ # userspace pm type prevents rm_addr
+ if reset "userspace pm type prevents rm_addr"; then
+ set_userspace_pm $ns1
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
+ chk_join_nr 0 0 0
+ chk_rm_nr 0 0
+ fi
+}
+
implicit_tests()
{
# userspace pm type prevents add_addr
@@ -2624,6 +2842,8 @@ all_tests_sorted=(
d@deny_join_id0_tests
m@fullmesh_tests
z@fastclose_tests
+ F@fail_tests
+ u@userspace_tests
I@implicit_tests
)
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index a75a68ad652e..6a2f4b981e1d 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <limits.h>
#include <sys/socket.h>
#include <sys/types.h>
@@ -21,17 +22,29 @@
#ifndef MPTCP_PM_NAME
#define MPTCP_PM_NAME "mptcp_pm"
#endif
+#ifndef MPTCP_PM_EVENTS
+#define MPTCP_PM_EVENTS "mptcp_pm_events"
+#endif
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
+ fprintf(stderr, "\trem id <local-id> token <token>\n");
+ fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n");
+ fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
fprintf(stderr, "\tflush\n");
fprintf(stderr, "\tdump\n");
fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+ fprintf(stderr, "\tevents\n");
+ fprintf(stderr, "\tlisten <local-ip> <local-port>\n");
exit(0);
}
@@ -83,6 +96,108 @@ static void nl_error(struct nlmsghdr *nh)
}
}
+static int capture_events(int fd, int event_group)
+{
+ u_int8_t buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024];
+ struct genlmsghdr *ghdr;
+ struct rtattr *attrs;
+ struct nlmsghdr *nh;
+ int ret = 0;
+ int res_len;
+ int msg_len;
+ fd_set rfds;
+
+ if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+ &event_group, sizeof(event_group)) < 0)
+ error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+
+ do {
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024;
+
+ ret = select(FD_SETSIZE, &rfds, NULL, NULL, NULL);
+
+ if (ret < 0)
+ error(1, ret, "error in select() on NL socket");
+
+ res_len = recv(fd, buffer, res_len, 0);
+ if (res_len < 0)
+ error(1, res_len, "error on recv() from NL socket");
+
+ nh = (struct nlmsghdr *)buffer;
+
+ for (; NLMSG_OK(nh, res_len); nh = NLMSG_NEXT(nh, res_len)) {
+ if (nh->nlmsg_type == NLMSG_ERROR)
+ error(1, NLMSG_ERROR, "received invalid NL message");
+
+ ghdr = (struct genlmsghdr *)NLMSG_DATA(nh);
+
+ if (ghdr->cmd == 0)
+ continue;
+
+ fprintf(stderr, "type:%d", ghdr->cmd);
+
+ msg_len = nh->nlmsg_len - NLMSG_LENGTH(GENL_HDRLEN);
+
+ attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+ while (RTA_OK(attrs, msg_len)) {
+ if (attrs->rta_type == MPTCP_ATTR_TOKEN)
+ fprintf(stderr, ",token:%u", *(__u32 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_FAMILY)
+ fprintf(stderr, ",family:%u", *(__u16 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_LOC_ID)
+ fprintf(stderr, ",loc_id:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_REM_ID)
+ fprintf(stderr, ",rem_id:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_SADDR4) {
+ u_int32_t saddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+ fprintf(stderr, ",saddr4:%u.%u.%u.%u", saddr4 >> 24,
+ (saddr4 >> 16) & 0xFF, (saddr4 >> 8) & 0xFF,
+ (saddr4 & 0xFF));
+ } else if (attrs->rta_type == MPTCP_ATTR_SADDR6) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+ sizeof(buf)) != NULL)
+ fprintf(stderr, ",saddr6:%s", buf);
+ } else if (attrs->rta_type == MPTCP_ATTR_DADDR4) {
+ u_int32_t daddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+ fprintf(stderr, ",daddr4:%u.%u.%u.%u", daddr4 >> 24,
+ (daddr4 >> 16) & 0xFF, (daddr4 >> 8) & 0xFF,
+ (daddr4 & 0xFF));
+ } else if (attrs->rta_type == MPTCP_ATTR_DADDR6) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+ sizeof(buf)) != NULL)
+ fprintf(stderr, ",daddr6:%s", buf);
+ } else if (attrs->rta_type == MPTCP_ATTR_SPORT)
+ fprintf(stderr, ",sport:%u",
+ ntohs(*(__u16 *)RTA_DATA(attrs)));
+ else if (attrs->rta_type == MPTCP_ATTR_DPORT)
+ fprintf(stderr, ",dport:%u",
+ ntohs(*(__u16 *)RTA_DATA(attrs)));
+ else if (attrs->rta_type == MPTCP_ATTR_BACKUP)
+ fprintf(stderr, ",backup:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_ERROR)
+ fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
+ fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+
+ attrs = RTA_NEXT(attrs, msg_len);
+ }
+ }
+ fprintf(stderr, "\n");
+ } while (1);
+
+ return 0;
+}
+
/* do a netlink command and, if max > 0, fetch the reply */
static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
{
@@ -116,11 +231,18 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
return ret;
}
-static int genl_parse_getfamily(struct nlmsghdr *nlh)
+static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
+ int *events_mcast_grp)
{
struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
int len = nlh->nlmsg_len;
struct rtattr *attrs;
+ struct rtattr *grps;
+ struct rtattr *grp;
+ int got_events_grp;
+ int got_family;
+ int grps_len;
+ int grp_len;
if (nlh->nlmsg_type != GENL_ID_CTRL)
error(1, errno, "Not a controller message, len=%d type=0x%x\n",
@@ -135,9 +257,42 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+ got_family = 0;
+ got_events_grp = 0;
+
while (RTA_OK(attrs, len)) {
- if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
- return *(__u16 *)RTA_DATA(attrs);
+ if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) {
+ *pm_family = *(__u16 *)RTA_DATA(attrs);
+ got_family = 1;
+ } else if (attrs->rta_type == CTRL_ATTR_MCAST_GROUPS) {
+ grps = RTA_DATA(attrs);
+ grps_len = RTA_PAYLOAD(attrs);
+
+ while (RTA_OK(grps, grps_len)) {
+ grp = RTA_DATA(grps);
+ grp_len = RTA_PAYLOAD(grps);
+ got_events_grp = 0;
+
+ while (RTA_OK(grp, grp_len)) {
+ if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
+ *events_mcast_grp = *(__u32 *)RTA_DATA(grp);
+ else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
+ !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+ got_events_grp = 1;
+
+ grp = RTA_NEXT(grp, grp_len);
+ }
+
+ if (got_events_grp)
+ break;
+
+ grps = RTA_NEXT(grps, grps_len);
+ }
+ }
+
+ if (got_family && got_events_grp)
+ return 0;
+
attrs = RTA_NEXT(attrs, len);
}
@@ -145,7 +300,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
return -1;
}
-static int resolve_mptcp_pm_netlink(int fd)
+static int resolve_mptcp_pm_netlink(int fd, int *pm_family, int *events_mcast_grp)
{
char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
@@ -167,7 +322,421 @@ static int resolve_mptcp_pm_netlink(int fd)
off += NLMSG_ALIGN(rta->rta_len);
do_nl_req(fd, nh, off, sizeof(data));
- return genl_parse_getfamily((void *)data);
+ return genl_parse_getfamily((void *)data, pm_family, events_mcast_grp);
+}
+
+int dsf(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *addr;
+ u_int16_t family, port;
+ struct nlmsghdr *nh;
+ u_int32_t token;
+ int addr_start;
+ int off = 0;
+ int arg;
+
+ const char *params[5];
+
+ memset(params, 0, 5 * sizeof(const char *));
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_DESTROY,
+ MPTCP_PM_VER);
+
+ if (argc < 12)
+ syntax(argv);
+
+ /* Params recorded in this order:
+ * <local-ip>, <local-port>, <remote-ip>, <remote-port>, <token>
+ */
+ for (arg = 2; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "lip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local IP");
+
+ params[0] = argv[arg];
+ } else if (!strcmp(argv[arg], "lport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local port");
+
+ params[1] = argv[arg];
+ } else if (!strcmp(argv[arg], "rip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote IP");
+
+ params[2] = argv[arg];
+ } else if (!strcmp(argv[arg], "rport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote port");
+
+ params[3] = argv[arg];
+ } else if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token");
+
+ params[4] = argv[arg];
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+
+ for (arg = 0; arg < 4; arg = arg + 2) {
+ /* addr header */
+ addr_start = off;
+ addr = (void *)(data + off);
+ addr->rta_type = NLA_F_NESTED |
+ ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+ addr->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(addr->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", params[arg]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* port */
+ port = atoi(params[arg + 1]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ addr->rta_len = off - addr_start;
+ }
+
+ /* token */
+ token = atoi(params[4]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, 0);
+
+ return 0;
+}
+
+int csf(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ const char *params[5];
+ struct nlmsghdr *nh;
+ struct rtattr *addr;
+ struct rtattr *rta;
+ u_int16_t family;
+ u_int32_t token;
+ u_int16_t port;
+ int addr_start;
+ u_int8_t id;
+ int off = 0;
+ int arg;
+
+ memset(params, 0, 5 * sizeof(const char *));
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_CREATE,
+ MPTCP_PM_VER);
+
+ if (argc < 12)
+ syntax(argv);
+
+ /* Params recorded in this order:
+ * <local-ip>, <local-id>, <remote-ip>, <remote-port>, <token>
+ */
+ for (arg = 2; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "lip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local IP");
+
+ params[0] = argv[arg];
+ } else if (!strcmp(argv[arg], "lid")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local id");
+
+ params[1] = argv[arg];
+ } else if (!strcmp(argv[arg], "rip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote ip");
+
+ params[2] = argv[arg];
+ } else if (!strcmp(argv[arg], "rport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote port");
+
+ params[3] = argv[arg];
+ } else if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token");
+
+ params[4] = argv[arg];
+ } else
+ error(1, 0, "unknown param %s", argv[arg]);
+ }
+
+ for (arg = 0; arg < 4; arg = arg + 2) {
+ /* addr header */
+ addr_start = off;
+ addr = (void *)(data + off);
+ addr->rta_type = NLA_F_NESTED |
+ ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+ addr->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(addr->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", params[arg]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ if (arg == 2) {
+ /* port */
+ port = atoi(params[arg + 1]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ if (arg == 0) {
+ /* id */
+ id = atoi(params[arg + 1]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ addr->rta_len = off - addr_start;
+ }
+
+ /* token */
+ token = atoi(params[4]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, 0);
+
+ return 0;
+}
+
+int remove_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ u_int32_t token;
+ u_int8_t id;
+ int off = 0;
+ int arg;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_REMOVE,
+ MPTCP_PM_VER);
+
+ if (argc < 6)
+ syntax(argv);
+
+ for (arg = 2; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "id")) {
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_LOC_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token value");
+
+ token = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+int announce_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ u_int32_t flags = MPTCP_PM_ADDR_FLAG_SIGNAL;
+ u_int32_t token = UINT_MAX;
+ struct rtattr *rta, *addr;
+ u_int32_t id = UINT_MAX;
+ struct nlmsghdr *nh;
+ u_int16_t family;
+ int addr_start;
+ int off = 0;
+ int arg;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ANNOUNCE,
+ MPTCP_PM_VER);
+
+ if (argc < 7)
+ syntax(argv);
+
+ /* local-ip header */
+ addr_start = off;
+ addr = (void *)(data + off);
+ addr->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ addr->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(addr->rta_len);
+
+ /* local-ip data */
+ /* record addr type */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", argv[2]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* addr family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ for (arg = 3; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "id")) {
+ /* local-id */
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "dev")) {
+ /* for the if_index */
+ int32_t ifindex;
+
+ if (++arg >= argc)
+ error(1, 0, " missing dev name");
+
+ ifindex = if_nametoindex(argv[arg]);
+ if (!ifindex)
+ error(1, errno, "unknown device %s", argv[arg]);
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &ifindex, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "port")) {
+ /* local-port (optional) */
+ u_int16_t port;
+
+ if (++arg >= argc)
+ error(1, 0, " missing port value");
+
+ port = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "token")) {
+ /* MPTCP connection token */
+ if (++arg >= argc)
+ error(1, 0, " missing token value");
+
+ token = atoi(argv[arg]);
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+
+ /* addr flags */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ addr->rta_len = off - addr_start;
+
+ if (id == UINT_MAX || token == UINT_MAX)
+ error(1, 0, " missing mandatory inputs");
+
+ /* token */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, 0);
+
+ return 0;
}
int add_addr(int fd, int pm_family, int argc, char *argv[])
@@ -654,6 +1223,54 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[])
return 0;
}
+int add_listener(int argc, char *argv[])
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in6 *a6;
+ struct sockaddr_in *a4;
+ u_int16_t family;
+ int enable = 1;
+ int sock;
+ int err;
+
+ if (argc < 4)
+ syntax(argv);
+
+ memset(&addr, 0, sizeof(struct sockaddr_storage));
+ a4 = (struct sockaddr_in *)&addr;
+ a6 = (struct sockaddr_in6 *)&addr;
+
+ if (inet_pton(AF_INET, argv[2], &a4->sin_addr)) {
+ family = AF_INET;
+ a4->sin_family = family;
+ a4->sin_port = htons(atoi(argv[3]));
+ } else if (inet_pton(AF_INET6, argv[2], &a6->sin6_addr)) {
+ family = AF_INET6;
+ a6->sin6_family = family;
+ a6->sin6_port = htons(atoi(argv[3]));
+ } else
+ error(1, errno, "can't parse ip %s", argv[2]);
+
+ sock = socket(family, SOCK_STREAM, IPPROTO_MPTCP);
+ if (sock < 0)
+ error(1, errno, "can't create listener sock\n");
+
+ if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))) {
+ close(sock);
+ error(1, errno, "can't set SO_REUSEADDR on listener sock\n");
+ }
+
+ err = bind(sock, (struct sockaddr *)&addr,
+ ((family == AF_INET) ? sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6)));
+
+ if (err == 0 && listen(sock, 30) == 0)
+ pause();
+
+ close(sock);
+ return 0;
+}
+
int set_flags(int fd, int pm_family, int argc, char *argv[])
{
char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -773,7 +1390,9 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
int main(int argc, char *argv[])
{
- int fd, pm_family;
+ int events_mcast_grp;
+ int pm_family;
+ int fd;
if (argc < 2)
syntax(argv);
@@ -782,10 +1401,18 @@ int main(int argc, char *argv[])
if (fd == -1)
error(1, errno, "socket netlink");
- pm_family = resolve_mptcp_pm_netlink(fd);
+ resolve_mptcp_pm_netlink(fd, &pm_family, &events_mcast_grp);
if (!strcmp(argv[1], "add"))
return add_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "ann"))
+ return announce_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "rem"))
+ return remove_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "csf"))
+ return csf(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "dsf"))
+ return dsf(fd, pm_family, argc, argv);
else if (!strcmp(argv[1], "del"))
return del_addr(fd, pm_family, argc, argv);
else if (!strcmp(argv[1], "flush"))
@@ -798,6 +1425,10 @@ int main(int argc, char *argv[])
return get_set_limits(fd, pm_family, argc, argv);
else if (!strcmp(argv[1], "set"))
return set_flags(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "events"))
+ return capture_events(fd, events_mcast_grp);
+ else if (!strcmp(argv[1], "listen"))
+ return add_listener(argc, argv);
fprintf(stderr, "unknown sub-command: %s", argv[1]);
syntax(argv);
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
new file mode 100755
index 000000000000..78d0bb640b11
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -0,0 +1,779 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Cannot not run test without ip tool"
+ exit 1
+fi
+
+ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
+REMOVED=7 # MPTCP_EVENT_REMOVED
+SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
+SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
+
+AF_INET=2
+AF_INET6=10
+
+evts_pid=0
+client4_pid=0
+server4_pid=0
+client6_pid=0
+server6_pid=0
+client4_token=""
+server4_token=""
+client6_token=""
+server6_token=""
+client4_port=0;
+client6_port=0;
+app4_port=50002
+new4_port=50003
+app6_port=50004
+client_addr_id=${RANDOM:0:2}
+server_addr_id=${RANDOM:0:2}
+
+sec=$(date +%s)
+rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+
+cleanup()
+{
+ echo "cleanup"
+
+ rm -rf $file
+
+ # Terminate the MPTCP connection and related processes
+ if [ $client4_pid -ne 0 ]; then
+ kill -SIGUSR1 $client4_pid > /dev/null 2>&1
+ fi
+ if [ $server4_pid -ne 0 ]; then
+ kill $server4_pid > /dev/null 2>&1
+ fi
+ if [ $client6_pid -ne 0 ]; then
+ kill -SIGUSR1 $client6_pid > /dev/null 2>&1
+ fi
+ if [ $server6_pid -ne 0 ]; then
+ kill $server6_pid > /dev/null 2>&1
+ fi
+ if [ $evts_pid -ne 0 ]; then
+ kill $evts_pid > /dev/null 2>&1
+ fi
+ local netns
+ for netns in "$ns1" "$ns2" ;do
+ ip netns del "$netns"
+ done
+}
+
+trap cleanup EXIT
+
+# Create and configure network namespaces for testing
+for i in "$ns1" "$ns2" ;do
+ ip netns add "$i" || exit 1
+ ip -net "$i" link set lo up
+ ip netns exec "$i" sysctl -q net.mptcp.enabled=1
+ ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
+done
+
+# "$ns1" ns2
+# ns1eth2 ns2eth1
+
+ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+
+# Add IPv4/v6 addresses to the namespaces
+ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
+ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
+ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ip -net "$ns1" link set ns1eth2 up
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth1
+ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
+ip -net "$ns2" link set ns2eth1 up
+
+stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2 \t\t\t[OK]\n"
+
+make_file()
+{
+ # Store a chunk of data in a file to transmit over an MPTCP connection
+ local name=$1
+ local ksize=1
+
+ dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null
+ echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+}
+
+make_connection()
+{
+ local file
+ file=$(mktemp)
+ make_file "$file" "client"
+
+ local is_v6=$1
+ local app_port=$app4_port
+ local connect_addr="10.0.1.1"
+ local listen_addr="0.0.0.0"
+ if [ "$is_v6" = "v6" ]
+ then
+ connect_addr="dead:beef:1::1"
+ listen_addr="::"
+ app_port=$app6_port
+ else
+ is_v6="v4"
+ fi
+
+ # Capture netlink events over the two network namespaces running
+ # the MPTCP client and server
+ local client_evts
+ client_evts=$(mktemp)
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
+ local client_evts_pid=$!
+ local server_evts
+ server_evts=$(mktemp)
+ :>"$server_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
+ local server_evts_pid=$!
+ sleep 0.5
+
+ # Run the server
+ ip netns exec "$ns1" \
+ ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
+ local server_pid=$!
+ sleep 0.5
+
+ # Run the client, transfer $file and stay connected to the server
+ # to conduct tests
+ ip netns exec "$ns2" \
+ ./mptcp_connect -s MPTCP -w 300 -m sendfile -p $app_port $connect_addr\
+ 2>&1 > /dev/null < "$file" &
+ local client_pid=$!
+ sleep 1
+
+ # Capture client/server attributes from MPTCP connection netlink events
+ kill $client_evts_pid
+
+ local client_token
+ local client_port
+ local client_serverside
+ local server_token
+ local server_serverside
+
+ client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+ client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+ client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
+ "$client_evts")
+ kill $server_evts_pid
+ server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+ server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
+ "$server_evts")
+ rm -f "$client_evts" "$server_evts" "$file"
+
+ if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
+ [ "$server_serverside" = 1 ]
+ then
+ stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t[OK]\n" $is_v6
+ else
+ exit 1
+ fi
+
+ if [ "$is_v6" = "v6" ]
+ then
+ client6_token=$client_token
+ server6_token=$server_token
+ client6_port=$client_port
+ client6_pid=$client_pid
+ server6_pid=$server_pid
+ else
+ client4_token=$client_token
+ server4_token=$server_token
+ client4_port=$client_port
+ client4_pid=$client_pid
+ server4_pid=$server_pid
+ fi
+}
+
+verify_announce_event()
+{
+ local evt=$1
+ local e_type=$2
+ local e_token=$3
+ local e_addr=$4
+ local e_id=$5
+ local e_dport=$6
+ local e_af=$7
+ local type
+ local token
+ local addr
+ local dport
+ local id
+
+ type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ if [ "$e_af" = "v6" ]
+ then
+ addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+ else
+ addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+ fi
+ dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+ [ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] &&
+ [ "$id" = "$e_id" ]
+ then
+ stdbuf -o0 -e0 printf "[OK]\n"
+ return 0
+ fi
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+}
+
+test_announce()
+{
+ local evts
+ evts=$(mktemp)
+ # Capture events on the network namespace running the server
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+ evts_pid=$!
+ sleep 0.5
+
+ # ADD_ADDR using an invalid token should result in no action
+ local invalid_token=$(( client4_token - 1))
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token $invalid_token id\
+ $client_addr_id dev ns2eth1 > /dev/null 2>&1
+
+ local type
+ type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+ stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token \t\t"
+ if [ "$type" = "" ]
+ then
+ stdbuf -o0 -e0 printf "[OK]\n"
+ else
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+ fi
+
+ # ADD_ADDR from the client to server machine reusing the subflow port
+ :>"$evts"
+ ip netns exec "$ns2"\
+ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
+ ns2eth1 > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id
+ sleep 0.5
+ verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2" "$client_addr_id"\
+ "$client4_port"
+
+ # ADD_ADDR6 from the client to server machine reusing the subflow port
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl ann\
+ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id
+ sleep 0.5
+ verify_announce_event "$evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
+ "$client_addr_id" "$client6_port" "v6"
+
+ # ADD_ADDR from the client to server machine using a new port
+ :>"$evts"
+ client_addr_id=$((client_addr_id+1))
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+ $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id
+ sleep 0.5
+ verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
+ "$client_addr_id" "$new4_port"
+
+ kill $evts_pid
+
+ # Capture events on the network namespace running the client
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+ evts_pid=$!
+ sleep 0.5
+
+ # ADD_ADDR from the server to client machine reusing the subflow port
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id dev ns1eth2 > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id
+ sleep 0.5
+ verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+ "$server_addr_id" "$app4_port"
+
+ # ADD_ADDR6 from the server to client machine reusing the subflow port
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+ $server_addr_id dev ns1eth2 > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id
+ sleep 0.5
+ verify_announce_event "$evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
+ "$server_addr_id" "$app6_port" "v6"
+
+ # ADD_ADDR from the server to client machine using a new port
+ :>"$evts"
+ server_addr_id=$((server_addr_id+1))
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id
+ sleep 0.5
+ verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+ "$server_addr_id" "$new4_port"
+
+ kill $evts_pid
+ rm -f "$evts"
+}
+
+verify_remove_event()
+{
+ local evt=$1
+ local e_type=$2
+ local e_token=$3
+ local e_id=$4
+ local type
+ local token
+ local id
+
+ type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+ [ "$id" = "$e_id" ]
+ then
+ stdbuf -o0 -e0 printf "[OK]\n"
+ return 0
+ fi
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+}
+
+test_remove()
+{
+ local evts
+ evts=$(mktemp)
+
+ # Capture events on the network namespace running the server
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+ evts_pid=$!
+ sleep 0.5
+
+ # RM_ADDR using an invalid token should result in no action
+ local invalid_token=$(( client4_token - 1 ))
+ ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
+ $client_addr_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token \t"\
+ $client_addr_id
+ local type
+ type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+ if [ "$type" = "" ]
+ then
+ stdbuf -o0 -e0 printf "[OK]\n"
+ else
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ fi
+
+ # RM_ADDR using an invalid addr id should result in no action
+ local invalid_id=$(( client_addr_id + 1 ))
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+ $invalid_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id \t"\
+ $invalid_id
+ type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+ if [ "$type" = "" ]
+ then
+ stdbuf -o0 -e0 printf "[OK]\n"
+ else
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ fi
+
+ # RM_ADDR from the client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+ $client_addr_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\
+ $client_addr_id
+ sleep 0.5
+ verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+ # RM_ADDR from the client to server machine
+ :>"$evts"
+ client_addr_id=$(( client_addr_id - 1 ))
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+ $client_addr_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\
+ $client_addr_id
+ sleep 0.5
+ verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+ # RM_ADDR6 from the client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
+ $client_addr_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1 \t"\
+ $client_addr_id
+ sleep 0.5
+ verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id"
+
+ kill $evts_pid
+
+ # Capture events on the network namespace running the client
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+ evts_pid=$!
+ sleep 0.5
+
+ # RM_ADDR from the server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+ $server_addr_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t"\
+ $server_addr_id
+ sleep 0.5
+ verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+ # RM_ADDR from the server to client machine
+ :>"$evts"
+ server_addr_id=$(( server_addr_id - 1 ))
+ ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+ $server_addr_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t" $server_addr_id
+ sleep 0.5
+ verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+ # RM_ADDR6 from the server to client machine
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
+ $server_addr_id > /dev/null 2>&1
+ stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2 \t" $server_addr_id
+ sleep 0.5
+ verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id"
+
+ kill $evts_pid
+ rm -f "$evts"
+}
+
+verify_subflow_events()
+{
+ local evt=$1
+ local e_type=$2
+ local e_token=$3
+ local e_family=$4
+ local e_saddr=$5
+ local e_daddr=$6
+ local e_dport=$7
+ local e_locid=$8
+ local e_remid=$9
+ shift 2
+ local e_from=$8
+ local e_to=$9
+ local type
+ local token
+ local family
+ local saddr
+ local daddr
+ local dport
+ local locid
+ local remid
+
+ if [ "$e_type" = "$SUB_ESTABLISHED" ]
+ then
+ if [ "$e_family" = "$AF_INET6" ]
+ then
+ stdbuf -o0 -e0 printf "CREATE_SUBFLOW6 %s (%s) => %s (%s) "\
+ "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ else
+ stdbuf -o0 -e0 printf "CREATE_SUBFLOW %s (%s) => %s (%s) \t"\
+ "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ fi
+ else
+ if [ "$e_family" = "$AF_INET6" ]
+ then
+ stdbuf -o0 -e0 printf "DESTROY_SUBFLOW6 %s (%s) => %s (%s) "\
+ "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ else
+ stdbuf -o0 -e0 printf "DESTROY_SUBFLOW %s (%s) => %s (%s) \t"\
+ "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+ fi
+ fi
+
+ type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ if [ "$family" = "$AF_INET6" ]
+ then
+ saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+ daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+ else
+ saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+ daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+ fi
+
+ if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+ [ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] &&
+ [ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] &&
+ [ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ]
+ then
+ stdbuf -o0 -e0 printf "[OK]\n"
+ return 0
+ fi
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+}
+
+test_subflows()
+{
+ local evts
+ evts=$(mktemp)
+ # Capture events on the network namespace running the server
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+ evts_pid=$!
+ sleep 0.5
+
+ # Attempt to add a listener at 10.0.2.2:<subflow-port>
+ ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+ "$client4_port" > /dev/null 2>&1 &
+ local listener_pid=$!
+
+ # ADD_ADDR from client to server machine reusing the subflow port
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+ $client_addr_id > /dev/null 2>&1
+ sleep 0.5
+
+ # CREATE_SUBFLOW from server to client machine
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+ rport "$client4_port" token "$server4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET" "10.0.2.1"\
+ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+ # Delete the listener from the client ns, if one was created
+ kill $listener_pid > /dev/null 2>&1
+
+ local sport
+ sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+ # DESTROY_SUBFLOW from server to client machine
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+ "$client4_port" token "$server4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+ # RM_ADDR from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+ "$client4_token" > /dev/null 2>&1
+ sleep 0.5
+
+ # Attempt to add a listener at dead:beef:2::2:<subflow-port>
+ ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\
+ "$client6_port" > /dev/null 2>&1 &
+ listener_pid=$!
+
+ # ADD_ADDR6 from client to server machine reusing the subflow port
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\
+ $client_addr_id > /dev/null 2>&1
+ sleep 0.5
+
+ # CREATE_SUBFLOW6 from server to client machine
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\
+ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\
+ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+ "$client_addr_id" "ns1" "ns2"
+
+ # Delete the listener from the client ns, if one was created
+ kill $listener_pid > /dev/null 2>&1
+
+ sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+ # DESTROY_SUBFLOW6 from server to client machine
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\
+ dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\
+ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+ "$client_addr_id" "ns1" "ns2"
+
+ # RM_ADDR from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+ "$client6_token" > /dev/null 2>&1
+ sleep 0.5
+
+ # Attempt to add a listener at 10.0.2.2:<new-port>
+ ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+ $new4_port > /dev/null 2>&1 &
+ listener_pid=$!
+
+ # ADD_ADDR from client to server machine using a new port
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+ $client_addr_id port $new4_port > /dev/null 2>&1
+ sleep 0.5
+
+ # CREATE_SUBFLOW from server to client machine
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\
+ $new4_port token "$server4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\
+ "10.0.2.1" "10.0.2.2" "$new4_port" "23"\
+ "$client_addr_id" "ns1" "ns2"
+
+ # Delete the listener from the client ns, if one was created
+ kill $listener_pid > /dev/null 2>&1
+
+ sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+ # DESTROY_SUBFLOW from server to client machine
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+ $new4_port token "$server4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+ "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+ # RM_ADDR from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+ "$client4_token" > /dev/null 2>&1
+
+ kill $evts_pid
+
+ # Capture events on the network namespace running the client
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+ evts_pid=$!
+ sleep 0.5
+
+ # Attempt to add a listener at 10.0.2.1:<subflow-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+ $app4_port > /dev/null 2>&1 &
+ listener_pid=$!
+
+ # ADD_ADDR from server to client machine reusing the subflow port
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id > /dev/null 2>&1
+ sleep 0.5
+
+ # CREATE_SUBFLOW from client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+ $app4_port token "$client4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET" "10.0.2.2"\
+ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ kill $listener_pid> /dev/null 2>&1
+
+ sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+ # DESTROY_SUBFLOW from client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+ $app4_port token "$client4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server4_token" > /dev/null 2>&1
+ sleep 0.5
+
+ # Attempt to add a listener at dead:beef:2::1:<subflow-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\
+ $app6_port > /dev/null 2>&1 &
+ listener_pid=$!
+
+ # ADD_ADDR6 from server to client machine reusing the subflow port
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+ $server_addr_id > /dev/null 2>&1
+ sleep 0.5
+
+ # CREATE_SUBFLOW6 from client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\
+ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client6_token"\
+ "$AF_INET6" "dead:beef:2::2"\
+ "dead:beef:2::1" "$app6_port" "23"\
+ "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ kill $listener_pid > /dev/null 2>&1
+
+ sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+ # DESTROY_SUBFLOW6 from client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\
+ dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_CLOSED" "$client6_token" "$AF_INET6" "dead:beef:2::2"\
+ "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR6 from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server6_token" > /dev/null 2>&1
+ sleep 0.5
+
+ # Attempt to add a listener at 10.0.2.1:<new-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+ $new4_port > /dev/null 2>&1 &
+ listener_pid=$!
+
+ # ADD_ADDR from server to client machine using a new port
+ :>"$evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id port $new4_port > /dev/null 2>&1
+ sleep 0.5
+
+ # CREATE_SUBFLOW from client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+ $new4_port token "$client4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\
+ "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ kill $listener_pid > /dev/null 2>&1
+
+ sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+ # DESTROY_SUBFLOW from client to server machine
+ :>"$evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+ $new4_port token "$client4_token" > /dev/null 2>&1
+ sleep 0.5
+ verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+ "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server4_token" > /dev/null 2>&1
+
+ kill $evts_pid
+ rm -f "$evts"
+}
+
+make_connection
+make_connection "v6"
+test_announce
+test_remove
+test_subflows
+
+exit 0
diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
new file mode 100755
index 000000000000..f508657ee126
--- /dev/null
+++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for the accept_unsolicited_na feature to
+# enable RFC9131 behaviour. The following is the test-matrix.
+# drop accept fwding behaviour
+# ---- ------ ------ ----------------------------------------------
+# 1 X X Drop NA packet and don't pass up the stack
+# 0 0 X Pass NA packet up the stack, don't update NC
+# 0 1 0 Pass NA packet up the stack, don't update NC
+# 0 1 1 Pass NA packet up the stack, and add a STALE
+# NC entry
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+HOST_NS="ns-host"
+ROUTER_NS="ns-router"
+
+HOST_INTF="veth-host"
+ROUTER_INTF="veth-router"
+
+ROUTER_ADDR="2000:20::1"
+HOST_ADDR="2000:20::2"
+SUBNET_WIDTH=64
+ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}"
+HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}"
+
+IP_HOST="ip -6 -netns ${HOST_NS}"
+IP_HOST_EXEC="ip netns exec ${HOST_NS}"
+IP_ROUTER="ip -6 -netns ${ROUTER_NS}"
+IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}"
+
+tcpdump_stdout=
+tcpdump_stderr=
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+setup()
+{
+ set -e
+
+ local drop_unsolicited_na=$1
+ local accept_unsolicited_na=$2
+ local forwarding=$3
+
+ # Setup two namespaces and a veth tunnel across them.
+ # On end of the tunnel is a router and the other end is a host.
+ ip netns add ${HOST_NS}
+ ip netns add ${ROUTER_NS}
+ ${IP_ROUTER} link add ${ROUTER_INTF} type veth \
+ peer name ${HOST_INTF} netns ${HOST_NS}
+
+ # Enable IPv6 on both router and host, and configure static addresses.
+ # The router here is the DUT
+ # Setup router configuration as specified by the arguments.
+ # forwarding=0 case is to check that a non-router
+ # doesn't add neighbour entries.
+ ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+ ${IP_ROUTER_EXEC} sysctl -qw \
+ ${ROUTER_CONF}.forwarding=${forwarding}
+ ${IP_ROUTER_EXEC} sysctl -qw \
+ ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na}
+ ${IP_ROUTER_EXEC} sysctl -qw \
+ ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na}
+ ${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0
+ ${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF}
+
+ # Turn on ndisc_notify on host interface so that
+ # the host sends unsolicited NAs.
+ HOST_CONF=net.ipv6.conf.${HOST_INTF}
+ ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+ ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+ ${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF}
+
+ set +e
+}
+
+start_tcpdump() {
+ set -e
+ tcpdump_stdout=`mktemp`
+ tcpdump_stderr=`mktemp`
+ ${IP_ROUTER_EXEC} timeout 15s \
+ tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+ "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \
+ > ${tcpdump_stdout} 2> /dev/null
+ set +e
+}
+
+cleanup_tcpdump()
+{
+ set -e
+ [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+ [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+ tcpdump_stdout=
+ tcpdump_stderr=
+ set +e
+}
+
+cleanup()
+{
+ cleanup_tcpdump
+ ip netns del ${HOST_NS}
+ ip netns del ${ROUTER_NS}
+}
+
+link_up() {
+ set -e
+ ${IP_ROUTER} link set dev ${ROUTER_INTF} up
+ ${IP_HOST} link set dev ${HOST_INTF} up
+ set +e
+}
+
+verify_ndisc() {
+ local drop_unsolicited_na=$1
+ local accept_unsolicited_na=$2
+ local forwarding=$3
+
+ neigh_show_output=$(${IP_ROUTER} neigh show \
+ to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale)
+ if [ ${drop_unsolicited_na} -eq 0 ] && \
+ [ ${accept_unsolicited_na} -eq 1 ] && \
+ [ ${forwarding} -eq 1 ]; then
+ # Neighbour entry expected to be present for 011 case
+ [[ ${neigh_show_output} ]]
+ else
+ # Neighbour entry expected to be absent for all other cases
+ [[ -z ${neigh_show_output} ]]
+ fi
+}
+
+test_unsolicited_na_common()
+{
+ # Setup the test bed, but keep links down
+ setup $1 $2 $3
+
+ # Bring the link up, wait for the NA,
+ # and add a delay to ensure neighbour processing is done.
+ link_up
+ start_tcpdump
+
+ # Verify the neighbour table
+ verify_ndisc $1 $2 $3
+
+}
+
+test_unsolicited_na_combination() {
+ test_unsolicited_na_common $1 $2 $3
+ test_msg=("test_unsolicited_na: "
+ "drop_unsolicited_na=$1 "
+ "accept_unsolicited_na=$2 "
+ "forwarding=$3")
+ log_test $? 0 "${test_msg[*]}"
+ cleanup
+}
+
+test_unsolicited_na_combinations() {
+ # Args: drop_unsolicited_na accept_unsolicited_na forwarding
+
+ # Expect entry
+ test_unsolicited_na_combination 0 1 1
+
+ # Expect no entry
+ test_unsolicited_na_combination 0 0 0
+ test_unsolicited_na_combination 0 0 1
+ test_unsolicited_na_combination 0 1 0
+ test_unsolicited_na_combination 1 0 0
+ test_unsolicited_na_combination 1 0 1
+ test_unsolicited_na_combination 1 1 0
+ test_unsolicited_na_combination 1 1 1
+}
+
+###############################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+ -p Pause on fail
+ -P Pause after each test before cleanup
+EOF
+}
+
+###############################################################################
+# main
+
+while getopts :pPh o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+test_unsolicited_na_combinations
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 865d53c1781c..417d214264f3 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -14,6 +14,8 @@ INIT_NETNS_NAME="init"
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+TESTS="init testns mix"
+
log_test()
{
local rc=$1
@@ -262,6 +264,8 @@ cleanup()
vrf_strict_mode_tests_init()
{
+ log_section "VRF strict_mode test on init network namespace"
+
vrf_strict_mode_check_support init
strict_mode_check_default init
@@ -292,6 +296,8 @@ vrf_strict_mode_tests_init()
vrf_strict_mode_tests_testns()
{
+ log_section "VRF strict_mode test on testns network namespace"
+
vrf_strict_mode_check_support testns
strict_mode_check_default testns
@@ -318,6 +324,8 @@ vrf_strict_mode_tests_testns()
vrf_strict_mode_tests_mix()
{
+ log_section "VRF strict_mode test mixing init and testns network namespaces"
+
read_strict_mode_compare_and_check init 1
read_strict_mode_compare_and_check testns 0
@@ -341,18 +349,30 @@ vrf_strict_mode_tests_mix()
read_strict_mode_compare_and_check testns 0
}
-vrf_strict_mode_tests()
-{
- log_section "VRF strict_mode test on init network namespace"
- vrf_strict_mode_tests_init
+################################################################################
+# usage
- log_section "VRF strict_mode test on testns network namespace"
- vrf_strict_mode_tests_testns
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
- log_section "VRF strict_mode test mixing init and testns network namespaces"
- vrf_strict_mode_tests_mix
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
}
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
vrf_strict_mode_check_support()
{
local nsname=$1
@@ -391,7 +411,17 @@ fi
cleanup &> /dev/null
setup
-vrf_strict_mode_tests
+for t in $TESTS
+do
+ case $t in
+ vrf_strict_mode_tests_init|init) vrf_strict_mode_tests_init;;
+ vrf_strict_mode_tests_testns|testns) vrf_strict_mode_tests_testns;;
+ vrf_strict_mode_tests_mix|mix) vrf_strict_mode_tests_mix;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+
+ esac
+done
cleanup
print_log_test_results
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
index 695a1958723f..fd76b69635a4 100755
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -66,6 +66,20 @@ table inet filter {
EOF
}
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
load_ruleset_count() {
local netns=$1
@@ -219,4 +233,40 @@ sleep 2
ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+ ip -net ${nsrouter} nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
exit 0
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 19515dcb7d04..f50778a3d744 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -40,6 +40,7 @@ ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
+ALL_TESTS="$ALL_TESTS 0008:1:1:match_int"
function allow_user_defaults()
{
@@ -785,6 +786,27 @@ sysctl_test_0007()
return $ksft_skip
}
+sysctl_test_0008()
+{
+ TARGET="${SYSCTL}/match_int"
+ if [ ! -f $TARGET ]; then
+ echo "Skipping test for $TARGET as it is not present ..."
+ return $ksft_skip
+ fi
+
+ echo -n "Testing if $TARGET is matched in kernel"
+ ORIG_VALUE=$(cat "${TARGET}")
+
+ if [ $ORIG_VALUE -ne 1 ]; then
+ echo "TEST FAILED"
+ rc=1
+ test_rc
+ fi
+
+ echo "ok"
+ return 0
+}
+
list_tests()
{
echo "Test ID list:"
@@ -800,6 +822,7 @@ list_tests()
echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
+ echo "0008 x $(get_test_count 0008) - tests sysctl macro values match"
}
usage()