198 files changed, 20067 insertions, 1955 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 2dbc1413fabd..6694a0fc8f99 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -19,10 +19,11 @@ SYNOPSIS
 BTF COMMANDS
 =============
 
-|	**bpftool** **btf dump** *BTF_SRC*
+|	**bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
 |	**bpftool** **btf help**
 |
 |	*BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
+|	*FORMAT* := { **raw** | **c** }
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 
@@ -31,23 +32,27 @@ DESCRIPTION
 	**bpftool btf dump** *BTF_SRC*
 		  Dump BTF entries from a given *BTF_SRC*.
 
-                  When **id** is specified, BTF object with that ID will be
-                  loaded and all its BTF types emitted.
+		  When **id** is specified, BTF object with that ID will be
+		  loaded and all its BTF types emitted.
 
-                  When **map** is provided, it's expected that map has
-                  associated BTF object with BTF types describing key and
-                  value. It's possible to select whether to dump only BTF
-                  type(s) associated with key (**key**), value (**value**),
-                  both key and value (**kv**), or all BTF types present in
-                  associated BTF object (**all**). If not specified, **kv**
-                  is assumed.
+		  When **map** is provided, it's expected that map has
+		  associated BTF object with BTF types describing key and
+		  value. It's possible to select whether to dump only BTF
+		  type(s) associated with key (**key**), value (**value**),
+		  both key and value (**kv**), or all BTF types present in
+		  associated BTF object (**all**). If not specified, **kv**
+		  is assumed.
 
-                  When **prog** is provided, it's expected that program has
-                  associated BTF object with BTF types.
+		  When **prog** is provided, it's expected that program has
+		  associated BTF object with BTF types.
 
-                  When specifying *FILE*, an ELF file is expected, containing
-                  .BTF section with well-defined BTF binary format data,
-                  typically produced by clang or pahole.
+		  When specifying *FILE*, an ELF file is expected, containing
+		  .BTF section with well-defined BTF binary format data,
+		  typically produced by clang or pahole.
+
+		  **format** option can be used to override default (raw)
+		  output format. Raw (**raw**) or C-syntax (**c**) output
+		  formats are supported.
 
 	**bpftool btf help**
 		  Print short help message.
@@ -67,6 +72,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 **# bpftool btf dump id 1226**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index e744b3e4e56a..585f270c2d25 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -29,7 +29,8 @@ CGROUP COMMANDS
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 |	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
 |		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-|		**sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** }
+|		**sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** |
+|		**getsockopt** | **setsockopt** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -90,7 +91,9 @@ DESCRIPTION
                   an unconnected udp4 socket (since 5.2);
 		  **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
                   an unconnected udp6 socket (since 5.2);
-		  **sysctl** sysctl access (since 5.2).
+		  **sysctl** sysctl access (since 5.2);
+		  **getsockopt** call to getsockopt (since 5.3);
+		  **setsockopt** call to setsockopt (since 5.3).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
@@ -117,6 +120,10 @@ OPTIONS
 	-f, --bpffs
 		  Show file names of pinned programs.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 |
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 14180e887082..4d08f35034a2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -73,6 +73,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 SEE ALSO
 ========
 	**bpf**\ (2),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 13ef27b39f20..490b4501cb6e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -152,6 +152,10 @@ OPTIONS
 		  Do not automatically attempt to mount any virtual file system
 		  (such as tracefs or BPF virtual file system) when necessary.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 **# bpftool map show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 934580850f42..d8e5237a2085 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -65,6 +65,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index 0c7576523a21..e252bd0bc434 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -53,6 +53,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 018ecef8dc13..7a374b3c851d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -29,6 +29,7 @@ PROG COMMANDS
 |	**bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog tracelog**
+|	**bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
 |	**bpftool** **prog help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -40,7 +41,8 @@ PROG COMMANDS
 |		**lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
-|		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl**
+|		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
+|		**cgroup/getsockopt** | **cgroup/setsockopt**
 |	}
 |       *ATTACH_TYPE* := {
 |		**msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
@@ -145,6 +147,39 @@ DESCRIPTION
 		  streaming data from BPF programs to user space, one can use
 		  perf events (see also **bpftool-map**\ (8)).
 
+	**bpftool prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
+		  Run BPF program *PROG* in the kernel testing infrastructure
+		  for BPF, meaning that the program works on the data and
+		  context provided by the user, and not on actual packets or
+		  monitored functions etc. Return value and duration for the
+		  test run are printed out to the console.
+
+		  Input data is read from the *FILE* passed with **data_in**.
+		  If this *FILE* is "**-**", input data is read from standard
+		  input. Input context, if any, is read from *FILE* passed with
+		  **ctx_in**. Again, "**-**" can be used to read from standard
+		  input, but only if standard input is not already in use for
+		  input data. If a *FILE* is passed with **data_out**, output
+		  data is written to that file. Similarly, output context is
+		  written to the *FILE* passed with **ctx_out**. For both
+		  output flows, "**-**" can be used to print to the standard
+		  output (as plain text, or JSON if relevant option was
+		  passed). If output keywords are omitted, output data and
+		  context are discarded. Keywords **data_size_out** and
+		  **ctx_size_out** are used to pass the size (in bytes) for the
+		  output buffers to the kernel, although the default of 32 kB
+		  should be more than enough for most cases.
+
+		  Keyword **repeat** is used to indicate the number of
+		  consecutive runs to perform. Note that output data and
+		  context printed to files correspond to the last of those
+		  runs. The duration printed out at the end of the runs is an
+		  average over all runs performed by the command.
+
+		  Not all program types support test run. Among those which do,
+		  not all of them can take the **ctx_in**/**ctx_out**
+		  arguments. bpftool does not perform checks on program types.
+
 	**bpftool prog help**
 		  Print short help message.
 
@@ -174,6 +209,11 @@ OPTIONS
 		  Do not automatically attempt to mount any virtual file system
 		  (such as tracefs or BPF virtual file system) when necessary.
 
+	-d, --debug
+		  Print all logs available, even debug-level information. This
+		  includes logs from libbpf as well as from the verifier, when
+		  attempting to load programs.
+
 EXAMPLES
 ========
 **# bpftool prog show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 3e562d7fd56f..6a9c52ef84a9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -66,6 +66,10 @@ OPTIONS
 		  Do not automatically attempt to mount any virtual file system
 		  (such as tracefs or BPF virtual file system) when necessary.
 
+	-d, --debug
+		  Print all logs available, even debug-level information. This
+		  includes logs from libbpf as well as from the verifier, when
+		  attempting to load programs.
 
 SEE ALSO
 ========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 4300adf6e5ab..c8f42e1fcbc9 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -71,6 +71,12 @@ _bpftool_get_prog_tags()
         command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
 }
 
+_bpftool_get_btf_ids()
+{
+    COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
+        command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) )
+}
+
 _bpftool_get_obj_map_names()
 {
     local obj
@@ -181,7 +187,7 @@ _bpftool()
 
     # Deal with options
     if [[ ${words[cword]} == -* ]]; then
-        local c='--version --json --pretty --bpffs --mapcompat'
+        local c='--version --json --pretty --bpffs --mapcompat --debug'
         COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
         return 0
     fi
@@ -336,6 +342,13 @@ _bpftool()
                 load|loadall)
                     local obj
 
+                    # Propose "load/loadall" to complete "bpftool prog load",
+                    # or bash tries to complete "load" as a filename below.
+                    if [[ ${#words[@]} -eq 3 ]]; then
+                        COMPREPLY=( $( compgen -W "load loadall" -- "$cur" ) )
+                        return 0
+                    fi
+
                     if [[ ${#words[@]} -lt 6 ]]; then
                         _filedir
                         return 0
@@ -373,7 +386,8 @@ _bpftool()
                                 cgroup/sendmsg4 cgroup/sendmsg6 \
                                 cgroup/recvmsg4 cgroup/recvmsg6 \
                                 cgroup/post_bind4 cgroup/post_bind6 \
-                                cgroup/sysctl" -- \
+                                cgroup/sysctl cgroup/getsockopt \
+                                cgroup/setsockopt" -- \
                                                    "$cur" ) )
                             return 0
                             ;;
@@ -401,10 +415,34 @@ _bpftool()
                 tracelog)
                     return 0
                     ;;
+                run)
+                    if [[ ${#words[@]} -lt 5 ]]; then
+                        _filedir
+                        return 0
+                    fi
+                    case $prev in
+                        id)
+                            _bpftool_get_prog_ids
+                            return 0
+                            ;;
+                        data_in|data_out|ctx_in|ctx_out)
+                            _filedir
+                            return 0
+                            ;;
+                        repeat|data_size_out|ctx_size_out)
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'data_in data_out data_size_out \
+                                ctx_in ctx_out ctx_size_out repeat'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help pin attach detach load \
-                            show list tracelog' -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'dump help pin attach detach \
+                            load loadall show list tracelog run' -- "$cur" ) )
                     ;;
             esac
             ;;
@@ -636,14 +674,30 @@ _bpftool()
                                 map)
                                     _bpftool_get_map_ids
                                     ;;
+                                dump)
+                                    _bpftool_get_btf_ids
+                                    ;;
                             esac
                             return 0
                             ;;
+                        format)
+                            COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
+                            ;;
                         *)
-                            if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then
-                                 COMPREPLY+=( $( compgen -W 'key value kv all' -- \
-                                     "$cur" ) )
-                            fi
+                            # emit extra options
+                            case ${words[3]} in
+                                id|file)
+                                    _bpftool_once_attr 'format'
+                                    ;;
+                                map|prog)
+                                    if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then
+                                        COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) )
+                                    fi
+                                    _bpftool_once_attr 'format'
+                                    ;;
+                                *)
+                                    ;;
+                            esac
                             return 0
                             ;;
                     esac
@@ -667,7 +721,8 @@ _bpftool()
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
                         device bind4 bind6 post_bind4 post_bind6 connect4 \
-                        connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl'
+                        connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
+                        getsockopt setsockopt'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag'
                     case $prev in
@@ -677,7 +732,8 @@ _bpftool()
                             ;;
                         ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
                         post_bind4|post_bind6|connect4|connect6|sendmsg4|\
-                        sendmsg6|recvmsg4|recvmsg6|sysctl)
+                        sendmsg6|recvmsg4|recvmsg6|sysctl|getsockopt|\
+                        setsockopt)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 7317438ecd9e..1b8ec91899e6 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -8,8 +8,8 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
-#include <gelf.h>
 #include <bpf.h>
+#include <libbpf.h>
 #include <linux/btf.h>
 
 #include "btf.h"
@@ -340,109 +340,40 @@ static int dump_btf_raw(const struct btf *btf,
 	return 0;
 }
 
-static bool check_btf_endianness(GElf_Ehdr *ehdr)
+static void __printf(2, 0) btf_dump_printf(void *ctx,
+					   const char *fmt, va_list args)
 {
-	static unsigned int const endian = 1;
-
-	switch (ehdr->e_ident[EI_DATA]) {
-	case ELFDATA2LSB:
-		return *(unsigned char const *)&endian == 1;
-	case ELFDATA2MSB:
-		return *(unsigned char const *)&endian == 0;
-	default:
-		return 0;
-	}
+	vfprintf(stdout, fmt, args);
 }
 
-static int btf_load_from_elf(const char *path, struct btf **btf)
+static int dump_btf_c(const struct btf *btf,
+		      __u32 *root_type_ids, int root_type_cnt)
 {
-	int err = -1, fd = -1, idx = 0;
-	Elf_Data *btf_data = NULL;
-	Elf_Scn *scn = NULL;
-	Elf *elf = NULL;
-	GElf_Ehdr ehdr;
-
-	if (elf_version(EV_CURRENT) == EV_NONE) {
-		p_err("failed to init libelf for %s", path);
-		return -1;
-	}
-
-	fd = open(path, O_RDONLY);
-	if (fd < 0) {
-		p_err("failed to open %s: %s", path, strerror(errno));
-		return -1;
-	}
-
-	elf = elf_begin(fd, ELF_C_READ, NULL);
-	if (!elf) {
-		p_err("failed to open %s as ELF file", path);
-		goto done;
-	}
-	if (!gelf_getehdr(elf, &ehdr)) {
-		p_err("failed to get EHDR from %s", path);
-		goto done;
-	}
-	if (!check_btf_endianness(&ehdr)) {
-		p_err("non-native ELF endianness is not supported");
-		goto done;
-	}
-	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
-		p_err("failed to get e_shstrndx from %s\n", path);
-		goto done;
-	}
+	struct btf_dump *d;
+	int err = 0, i;
 
-	while ((scn = elf_nextscn(elf, scn)) != NULL) {
-		GElf_Shdr sh;
-		char *name;
+	d = btf_dump__new(btf, NULL, NULL, btf_dump_printf);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
 
-		idx++;
-		if (gelf_getshdr(scn, &sh) != &sh) {
-			p_err("failed to get section(%d) header from %s",
-			      idx, path);
-			goto done;
-		}
-		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
-		if (!name) {
-			p_err("failed to get section(%d) name from %s",
-			      idx, path);
-			goto done;
-		}
-		if (strcmp(name, BTF_ELF_SEC) == 0) {
-			btf_data = elf_getdata(scn, 0);
-			if (!btf_data) {
-				p_err("failed to get section(%d, %s) data from %s",
-				      idx, name, path);
+	if (root_type_cnt) {
+		for (i = 0; i < root_type_cnt; i++) {
+			err = btf_dump__dump_type(d, root_type_ids[i]);
+			if (err)
 				goto done;
-			}
-			break;
 		}
-	}
-
-	if (!btf_data) {
-		p_err("%s ELF section not found in %s", BTF_ELF_SEC, path);
-		goto done;
-	}
+	} else {
+		int cnt = btf__get_nr_types(btf);
 
-	*btf = btf__new(btf_data->d_buf, btf_data->d_size);
-	if (IS_ERR(*btf)) {
-		err = PTR_ERR(*btf);
-		*btf = NULL;
-		p_err("failed to load BTF data from %s: %s",
-		      path, strerror(err));
-		goto done;
+		for (i = 1; i <= cnt; i++) {
+			err = btf_dump__dump_type(d, i);
+			if (err)
+				goto done;
+		}
 	}
 
-	err = 0;
 done:
-	if (err) {
-		if (*btf) {
-			btf__free(*btf);
-			*btf = NULL;
-		}
-	}
-	if (elf)
-		elf_end(elf);
-	close(fd);
+	btf_dump__free(d);
 	return err;
 }
 
@@ -451,6 +382,7 @@ static int do_dump(int argc, char **argv)
 	struct btf *btf = NULL;
 	__u32 root_type_ids[2];
 	int root_type_cnt = 0;
+	bool dump_c = false;
 	__u32 btf_id = -1;
 	const char *src;
 	int fd = -1;
@@ -522,9 +454,14 @@ static int do_dump(int argc, char **argv)
 		}
 		NEXT_ARG();
 	} else if (is_prefix(src, "file")) {
-		err = btf_load_from_elf(*argv, &btf);
-		if (err)
+		btf = btf__parse_elf(*argv, NULL);
+		if (IS_ERR(btf)) {
+			err = PTR_ERR(btf);
+			btf = NULL;
+			p_err("failed to load BTF from %s: %s", 
+			      *argv, strerror(err));
 			goto done;
+		}
 		NEXT_ARG();
 	} else {
 		err = -1;
@@ -532,6 +469,29 @@ static int do_dump(int argc, char **argv)
 		goto done;
 	}
 
+	while (argc) {
+		if (is_prefix(*argv, "format")) {
+			NEXT_ARG();
+			if (argc < 1) {
+				p_err("expecting value for 'format' option\n");
+				goto done;
+			}
+			if (strcmp(*argv, "c") == 0) {
+				dump_c = true;
+			} else if (strcmp(*argv, "raw") == 0) {
+				dump_c = false;
+			} else {
+				p_err("unrecognized format specifier: '%s', possible values: raw, c",
+				      *argv);
+				goto done;
+			}
+			NEXT_ARG();
+		} else {
+			p_err("unrecognized option: '%s'", *argv);
+			goto done;
+		}
+	}
+
 	if (!btf) {
 		err = btf__get_from_id(btf_id, &btf);
 		if (err) {
@@ -545,7 +505,16 @@ static int do_dump(int argc, char **argv)
 		}
 	}
 
-	dump_btf_raw(btf, root_type_ids, root_type_cnt);
+	if (dump_c) {
+		if (json_output) {
+			p_err("JSON output for C-syntax dump is not supported");
+			err = -ENOTSUP;
+			goto done;
+		}
+		err = dump_btf_c(btf, root_type_ids, root_type_cnt);
+	} else {
+		err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
+	}
 
 done:
 	close(fd);
@@ -561,10 +530,11 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s btf dump BTF_SRC\n"
+		"Usage: %s btf dump BTF_SRC [format FORMAT]\n"
 		"       %s btf help\n"
 		"\n"
 		"       BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
+		"       FORMAT  := { raw | c }\n"
 		"       " HELP_SPEC_MAP "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index a13fb7265d1a..f3c05b08c68c 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -26,7 +26,8 @@
 	"                        sock_ops | device | bind4 | bind6 |\n"	       \
 	"                        post_bind4 | post_bind6 | connect4 |\n"       \
 	"                        connect6 | sendmsg4 | sendmsg6 |\n"           \
-	"                        recvmsg4 | recvmsg6 | sysctl }"
+	"                        recvmsg4 | recvmsg6 | sysctl |\n"	       \
+	"                        getsockopt | setsockopt }"
 
 static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -45,6 +46,8 @@ static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_SYSCTL] = "sysctl",
 	[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
 	[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+	[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+	[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index f7261fad45c1..5215e0870bcb 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -21,6 +21,7 @@
 #include <sys/vfs.h>
 
 #include <bpf.h>
+#include <libbpf.h> /* libbpf_num_possible_cpus */
 
 #include "main.h"
 
@@ -439,57 +440,13 @@ unsigned int get_page_size(void)
 
 unsigned int get_possible_cpus(void)
 {
-	static unsigned int result;
-	char buf[128];
-	long int n;
-	char *ptr;
-	int fd;
-
-	if (result)
-		return result;
-
-	fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
-	if (fd < 0) {
-		p_err("can't open sysfs possible cpus");
-		exit(-1);
-	}
-
-	n = read(fd, buf, sizeof(buf));
-	if (n < 2) {
-		p_err("can't read sysfs possible cpus");
-		exit(-1);
-	}
-	close(fd);
+	int cpus = libbpf_num_possible_cpus();
 
-	if (n == sizeof(buf)) {
-		p_err("read sysfs possible cpus overflow");
+	if (cpus < 0) {
+		p_err("Can't get # of possible cpus: %s", strerror(-cpus));
 		exit(-1);
 	}
-
-	ptr = buf;
-	n = 0;
-	while (*ptr && *ptr != '\n') {
-		unsigned int a, b;
-
-		if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
-			n += b - a + 1;
-
-			ptr = strchr(ptr, '-') + 1;
-		} else if (sscanf(ptr, "%u", &a) == 1) {
-			n++;
-		} else {
-			assert(0);
-		}
-
-		while (isdigit(*ptr))
-			ptr++;
-		if (*ptr == ',')
-			ptr++;
-	}
-
-	result = n;
-
-	return result;
+	return cpus;
 }
 
 static char *
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 3ef3093560ba..bfed711258ce 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -11,6 +11,8 @@
  * Licensed under the GNU General Public License, version 2.0 (GPLv2)
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -44,11 +46,13 @@ static int fprintf_json(void *out, const char *fmt, ...)
 	char *s;
 
 	va_start(ap, fmt);
+	if (vasprintf(&s, fmt, ap) < 0)
+		return -1;
+	va_end(ap);
+
 	if (!oper_count) {
 		int i;
 
-		s = va_arg(ap, char *);
-
 		/* Strip trailing spaces */
 		i = strlen(s) - 1;
 		while (s[i] == ' ')
@@ -61,11 +65,10 @@ static int fprintf_json(void *out, const char *fmt, ...)
 	} else if (!strcmp(fmt, ",")) {
 		   /* Skip */
 	} else {
-		s = va_arg(ap, char *);
 		jsonw_string(json_wtr, s);
 		oper_count++;
 	}
-	va_end(ap);
+	free(s);
 	return 0;
 }
 
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ac1fc520e6a..e916ff25697f 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -10,6 +10,7 @@
 #include <string.h>
 
 #include <bpf.h>
+#include <libbpf.h>
 
 #include "main.h"
 
@@ -25,6 +26,7 @@ bool pretty_output;
 bool json_output;
 bool show_pinned;
 bool block_mount;
+bool verifier_logs;
 int bpf_flags;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
@@ -77,6 +79,13 @@ static int do_version(int argc, char **argv)
 	return 0;
 }
 
+static int __printf(2, 0)
+print_all_levels(__maybe_unused enum libbpf_print_level level,
+		 const char *format, va_list args)
+{
+	return vfprintf(stderr, format, args);
+}
+
 int cmd_select(const struct cmd *cmds, int argc, char **argv,
 	       int (*help)(int argc, char **argv))
 {
@@ -108,6 +117,35 @@ bool is_prefix(const char *pfx, const char *str)
 	return !memcmp(str, pfx, strlen(pfx));
 }
 
+/* Last argument MUST be NULL pointer */
+int detect_common_prefix(const char *arg, ...)
+{
+	unsigned int count = 0;
+	const char *ref;
+	char msg[256];
+	va_list ap;
+
+	snprintf(msg, sizeof(msg), "ambiguous prefix: '%s' could be '", arg);
+	va_start(ap, arg);
+	while ((ref = va_arg(ap, const char *))) {
+		if (!is_prefix(arg, ref))
+			continue;
+		count++;
+		if (count > 1)
+			strncat(msg, "' or '", sizeof(msg) - strlen(msg) - 1);
+		strncat(msg, ref, sizeof(msg) - strlen(msg) - 1);
+	}
+	va_end(ap);
+	strncat(msg, "'", sizeof(msg) - strlen(msg) - 1);
+
+	if (count >= 2) {
+		p_err(msg);
+		return -1;
+	}
+
+	return 0;
+}
+
 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
 {
 	unsigned char *data = arg;
@@ -317,6 +355,7 @@ int main(int argc, char **argv)
 		{ "bpffs",	no_argument,	NULL,	'f' },
 		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ "nomount",	no_argument,	NULL,	'n' },
+		{ "debug",	no_argument,	NULL,	'd' },
 		{ 0 }
 	};
 	int opt, ret;
@@ -332,7 +371,7 @@ int main(int argc, char **argv)
 	hash_init(map_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "Vhpjfmn",
+	while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -362,6 +401,10 @@ int main(int argc, char **argv)
 		case 'n':
 			block_mount = true;
 			break;
+		case 'd':
+			libbpf_set_print(print_all_levels);
+			verifier_logs = true;
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 3d63feb7f852..3ef0d9051e10 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -74,6 +74,7 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
 	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
 	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
+	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
 };
 
 extern const char * const map_type_name[];
@@ -91,6 +92,7 @@ extern json_writer_t *json_wtr;
 extern bool json_output;
 extern bool show_pinned;
 extern bool block_mount;
+extern bool verifier_logs;
 extern int bpf_flags;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
@@ -99,6 +101,7 @@ void p_err(const char *fmt, ...);
 void p_info(const char *fmt, ...);
 
 bool is_prefix(const char *pfx, const char *str);
+int detect_common_prefix(const char *arg, ...);
 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
 void usage(void) __noreturn;
 
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 0507dfaf7a8f..3f108ab17797 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -28,7 +28,7 @@
 
 #define MMAP_PAGE_CNT	16
 
-static bool stop;
+static volatile bool stop;
 
 struct event_ring_info {
 	int fd;
@@ -44,32 +44,44 @@ struct perf_event_sample {
 	unsigned char data[];
 };
 
+struct perf_event_lost {
+	struct perf_event_header header;
+	__u64 id;
+	__u64 lost;
+};
+
 static void int_exit(int signo)
 {
 	fprintf(stderr, "Stopping...\n");
 	stop = true;
 }
 
+struct event_pipe_ctx {
+	bool all_cpus;
+	int cpu;
+	int idx;
+};
+
 static enum bpf_perf_event_ret
-print_bpf_output(struct perf_event_header *event, void *private_data)
+print_bpf_output(void *private_data, int cpu, struct perf_event_header *event)
 {
-	struct perf_event_sample *e = container_of(event, struct perf_event_sample,
+	struct perf_event_sample *e = container_of(event,
+						   struct perf_event_sample,
 						   header);
-	struct event_ring_info *ring = private_data;
-	struct {
-		struct perf_event_header header;
-		__u64 id;
-		__u64 lost;
-	} *lost = (typeof(lost))event;
+	struct perf_event_lost *lost = container_of(event,
+						    struct perf_event_lost,
+						    header);
+	struct event_pipe_ctx *ctx = private_data;
+	int idx = ctx->all_cpus ? cpu : ctx->idx;
 
 	if (json_output) {
 		jsonw_start_object(json_wtr);
 		jsonw_name(json_wtr, "type");
 		jsonw_uint(json_wtr, e->header.type);
 		jsonw_name(json_wtr, "cpu");
-		jsonw_uint(json_wtr, ring->cpu);
+		jsonw_uint(json_wtr, cpu);
 		jsonw_name(json_wtr, "index");
-		jsonw_uint(json_wtr, ring->key);
+		jsonw_uint(json_wtr, idx);
 		if (e->header.type == PERF_RECORD_SAMPLE) {
 			jsonw_name(json_wtr, "timestamp");
 			jsonw_uint(json_wtr, e->time);
@@ -89,7 +101,7 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
 		if (e->header.type == PERF_RECORD_SAMPLE) {
 			printf("== @%lld.%09lld CPU: %d index: %d =====\n",
 			       e->time / 1000000000ULL, e->time % 1000000000ULL,
-			       ring->cpu, ring->key);
+			       cpu, idx);
 			fprint_hex(stdout, e->data, e->size, " ");
 			printf("\n");
 		} else if (e->header.type == PERF_RECORD_LOST) {
@@ -103,87 +115,25 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
 	return LIBBPF_PERF_EVENT_CONT;
 }
 
-static void
-perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
-{
-	enum bpf_perf_event_ret ret;
-
-	ret = bpf_perf_event_read_simple(ring->mem,
-					 MMAP_PAGE_CNT * get_page_size(),
-					 get_page_size(), buf, buf_len,
-					 print_bpf_output, ring);
-	if (ret != LIBBPF_PERF_EVENT_CONT) {
-		fprintf(stderr, "perf read loop failed with %d\n", ret);
-		stop = true;
-	}
-}
-
-static int perf_mmap_size(void)
-{
-	return get_page_size() * (MMAP_PAGE_CNT + 1);
-}
-
-static void *perf_event_mmap(int fd)
-{
-	int mmap_size = perf_mmap_size();
-	void *base;
-
-	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	if (base == MAP_FAILED) {
-		p_err("event mmap failed: %s\n", strerror(errno));
-		return NULL;
-	}
-
-	return base;
-}
-
-static void perf_event_unmap(void *mem)
-{
-	if (munmap(mem, perf_mmap_size()))
-		fprintf(stderr, "Can't unmap ring memory!\n");
-}
-
-static int bpf_perf_event_open(int map_fd, int key, int cpu)
+int do_event_pipe(int argc, char **argv)
 {
-	struct perf_event_attr attr = {
+	struct perf_event_attr perf_attr = {
 		.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
 		.type = PERF_TYPE_SOFTWARE,
 		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.sample_period = 1,
+		.wakeup_events = 1,
 	};
-	int pmu_fd;
-
-	pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
-	if (pmu_fd < 0) {
-		p_err("failed to open perf event %d for CPU %d", key, cpu);
-		return -1;
-	}
-
-	if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
-		p_err("failed to update map for event %d for CPU %d", key, cpu);
-		goto err_close;
-	}
-	if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
-		p_err("failed to enable event %d for CPU %d", key, cpu);
-		goto err_close;
-	}
-
-	return pmu_fd;
-
-err_close:
-	close(pmu_fd);
-	return -1;
-}
-
-int do_event_pipe(int argc, char **argv)
-{
-	int i, nfds, map_fd, index = -1, cpu = -1;
 	struct bpf_map_info map_info = {};
-	struct event_ring_info *rings;
-	size_t tmp_buf_sz = 0;
-	void *tmp_buf = NULL;
-	struct pollfd *pfds;
+	struct perf_buffer_raw_opts opts = {};
+	struct event_pipe_ctx ctx = {
+		.all_cpus = true,
+		.cpu = -1,
+		.idx = -1,
+	};
+	struct perf_buffer *pb;
 	__u32 map_info_len;
-	bool do_all = true;
+	int err, map_fd;
 
 	map_info_len = sizeof(map_info);
 	map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
@@ -205,7 +155,7 @@ int do_event_pipe(int argc, char **argv)
 			char *endptr;
 
 			NEXT_ARG();
-			cpu = strtoul(*argv, &endptr, 0);
+			ctx.cpu = strtoul(*argv, &endptr, 0);
 			if (*endptr) {
 				p_err("can't parse %s as CPU ID", **argv);
 				goto err_close_map;
@@ -216,7 +166,7 @@ int do_event_pipe(int argc, char **argv)
 			char *endptr;
 
 			NEXT_ARG();
-			index = strtoul(*argv, &endptr, 0);
+			ctx.idx = strtoul(*argv, &endptr, 0);
 			if (*endptr) {
 				p_err("can't parse %s as index", **argv);
 				goto err_close_map;
@@ -228,45 +178,32 @@ int do_event_pipe(int argc, char **argv)
 			goto err_close_map;
 		}
 
-		do_all = false;
+		ctx.all_cpus = false;
 	}
 
-	if (!do_all) {
-		if (index == -1 || cpu == -1) {
+	if (!ctx.all_cpus) {
+		if (ctx.idx == -1 || ctx.cpu == -1) {
 			p_err("cpu and index must be specified together");
 			goto err_close_map;
 		}
-
-		nfds = 1;
 	} else {
-		nfds = min(get_possible_cpus(), map_info.max_entries);
-		cpu = 0;
-		index = 0;
+		ctx.cpu = 0;
+		ctx.idx = 0;
 	}
 
-	rings = calloc(nfds, sizeof(rings[0]));
-	if (!rings)
+	opts.attr = &perf_attr;
+	opts.event_cb = print_bpf_output;
+	opts.ctx = &ctx;
+	opts.cpu_cnt = ctx.all_cpus ? 0 : 1;
+	opts.cpus = &ctx.cpu;
+	opts.map_keys = &ctx.idx;
+
+	pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts);
+	err = libbpf_get_error(pb);
+	if (err) {
+		p_err("failed to create perf buffer: %s (%d)",
+		      strerror(err), err);
 		goto err_close_map;
-
-	pfds = calloc(nfds, sizeof(pfds[0]));
-	if (!pfds)
-		goto err_free_rings;
-
-	for (i = 0; i < nfds; i++) {
-		rings[i].cpu = cpu + i;
-		rings[i].key = index + i;
-
-		rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
-						  rings[i].cpu);
-		if (rings[i].fd < 0)
-			goto err_close_fds_prev;
-
-		rings[i].mem = perf_event_mmap(rings[i].fd);
-		if (!rings[i].mem)
-			goto err_close_fds_current;
-
-		pfds[i].fd = rings[i].fd;
-		pfds[i].events = POLLIN;
 	}
 
 	signal(SIGINT, int_exit);
@@ -277,34 +214,24 @@ int do_event_pipe(int argc, char **argv)
 		jsonw_start_array(json_wtr);
 
 	while (!stop) {
-		poll(pfds, nfds, 200);
-		for (i = 0; i < nfds; i++)
-			perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
+		err = perf_buffer__poll(pb, 200);
+		if (err < 0 && err != -EINTR) {
+			p_err("perf buffer polling failed: %s (%d)",
+			      strerror(err), err);
+			goto err_close_pb;
+		}
 	}
-	free(tmp_buf);
 
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
-	for (i = 0; i < nfds; i++) {
-		perf_event_unmap(rings[i].mem);
-		close(rings[i].fd);
-	}
-	free(pfds);
-	free(rings);
+	perf_buffer__free(pb);
 	close(map_fd);
 
 	return 0;
 
-err_close_fds_prev:
-	while (i--) {
-		perf_event_unmap(rings[i].mem);
-err_close_fds_current:
-		close(rings[i].fd);
-	}
-	free(pfds);
-err_free_rings:
-	free(rings);
+err_close_pb:
+	perf_buffer__free(pb);
 err_close_map:
 	close(map_fd);
 	return -1;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 7a4e21a31523..66f04a4846a5 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -15,6 +15,7 @@
 #include <sys/stat.h>
 
 #include <linux/err.h>
+#include <linux/sizes.h>
 
 #include <bpf.h>
 #include <btf.h>
@@ -748,12 +749,351 @@ static int do_detach(int argc, char **argv)
 	return 0;
 }
 
+static int check_single_stdin(char *file_data_in, char *file_ctx_in)
+{
+	if (file_data_in && file_ctx_in &&
+	    !strcmp(file_data_in, "-") && !strcmp(file_ctx_in, "-")) {
+		p_err("cannot use standard input for both data_in and ctx_in");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int get_run_data(const char *fname, void **data_ptr, unsigned int *size)
+{
+	size_t block_size = 256;
+	size_t buf_size = block_size;
+	size_t nb_read = 0;
+	void *tmp;
+	FILE *f;
+
+	if (!fname) {
+		*data_ptr = NULL;
+		*size = 0;
+		return 0;
+	}
+
+	if (!strcmp(fname, "-"))
+		f = stdin;
+	else
+		f = fopen(fname, "r");
+	if (!f) {
+		p_err("failed to open %s: %s", fname, strerror(errno));
+		return -1;
+	}
+
+	*data_ptr = malloc(block_size);
+	if (!*data_ptr) {
+		p_err("failed to allocate memory for data_in/ctx_in: %s",
+		      strerror(errno));
+		goto err_fclose;
+	}
+
+	while ((nb_read += fread(*data_ptr + nb_read, 1, block_size, f))) {
+		if (feof(f))
+			break;
+		if (ferror(f)) {
+			p_err("failed to read data_in/ctx_in from %s: %s",
+			      fname, strerror(errno));
+			goto err_free;
+		}
+		if (nb_read > buf_size - block_size) {
+			if (buf_size == UINT32_MAX) {
+				p_err("data_in/ctx_in is too long (max: %d)",
+				      UINT32_MAX);
+				goto err_free;
+			}
+			/* No space for fread()-ing next chunk; realloc() */
+			buf_size *= 2;
+			tmp = realloc(*data_ptr, buf_size);
+			if (!tmp) {
+				p_err("failed to reallocate data_in/ctx_in: %s",
+				      strerror(errno));
+				goto err_free;
+			}
+			*data_ptr = tmp;
+		}
+	}
+	if (f != stdin)
+		fclose(f);
+
+	*size = nb_read;
+	return 0;
+
+err_free:
+	free(*data_ptr);
+	*data_ptr = NULL;
+err_fclose:
+	if (f != stdin)
+		fclose(f);
+	return -1;
+}
+
+static void hex_print(void *data, unsigned int size, FILE *f)
+{
+	size_t i, j;
+	char c;
+
+	for (i = 0; i < size; i += 16) {
+		/* Row offset */
+		fprintf(f, "%07zx\t", i);
+
+		/* Hexadecimal values */
+		for (j = i; j < i + 16 && j < size; j++)
+			fprintf(f, "%02x%s", *(uint8_t *)(data + j),
+				j % 2 ? " " : "");
+		for (; j < i + 16; j++)
+			fprintf(f, "  %s", j % 2 ? " " : "");
+
+		/* ASCII values (if relevant), '.' otherwise */
+		fprintf(f, "| ");
+		for (j = i; j < i + 16 && j < size; j++) {
+			c = *(char *)(data + j);
+			if (c < ' ' || c > '~')
+				c = '.';
+			fprintf(f, "%c%s", c, j == i + 7 ? " " : "");
+		}
+
+		fprintf(f, "\n");
+	}
+}
+
+static int
+print_run_output(void *data, unsigned int size, const char *fname,
+		 const char *json_key)
+{
+	size_t nb_written;
+	FILE *f;
+
+	if (!fname)
+		return 0;
+
+	if (!strcmp(fname, "-")) {
+		f = stdout;
+		if (json_output) {
+			jsonw_name(json_wtr, json_key);
+			print_data_json(data, size);
+		} else {
+			hex_print(data, size, f);
+		}
+		return 0;
+	}
+
+	f = fopen(fname, "w");
+	if (!f) {
+		p_err("failed to open %s: %s", fname, strerror(errno));
+		return -1;
+	}
+
+	nb_written = fwrite(data, 1, size, f);
+	fclose(f);
+	if (nb_written != size) {
+		p_err("failed to write output data/ctx: %s", strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int alloc_run_data(void **data_ptr, unsigned int size_out)
+{
+	*data_ptr = calloc(size_out, 1);
+	if (!*data_ptr) {
+		p_err("failed to allocate memory for output data/ctx: %s",
+		      strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int do_run(int argc, char **argv)
+{
+	char *data_fname_in = NULL, *data_fname_out = NULL;
+	char *ctx_fname_in = NULL, *ctx_fname_out = NULL;
+	struct bpf_prog_test_run_attr test_attr = {0};
+	const unsigned int default_size = SZ_32K;
+	void *data_in = NULL, *data_out = NULL;
+	void *ctx_in = NULL, *ctx_out = NULL;
+	unsigned int repeat = 1;
+	int fd, err;
+
+	if (!REQ_ARGS(4))
+		return -1;
+
+	fd = prog_parse_fd(&argc, &argv);
+	if (fd < 0)
+		return -1;
+
+	while (argc) {
+		if (detect_common_prefix(*argv, "data_in", "data_out",
+					 "data_size_out", NULL))
+			return -1;
+		if (detect_common_prefix(*argv, "ctx_in", "ctx_out",
+					 "ctx_size_out", NULL))
+			return -1;
+
+		if (is_prefix(*argv, "data_in")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			data_fname_in = GET_ARG();
+			if (check_single_stdin(data_fname_in, ctx_fname_in))
+				return -1;
+		} else if (is_prefix(*argv, "data_out")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			data_fname_out = GET_ARG();
+		} else if (is_prefix(*argv, "data_size_out")) {
+			char *endptr;
+
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			test_attr.data_size_out = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as output data size",
+				      *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "ctx_in")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			ctx_fname_in = GET_ARG();
+			if (check_single_stdin(data_fname_in, ctx_fname_in))
+				return -1;
+		} else if (is_prefix(*argv, "ctx_out")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			ctx_fname_out = GET_ARG();
+		} else if (is_prefix(*argv, "ctx_size_out")) {
+			char *endptr;
+
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			test_attr.ctx_size_out = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as output context size",
+				      *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "repeat")) {
+			char *endptr;
+
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			repeat = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as repeat number",
+				      *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else {
+			p_err("expected no more arguments, 'data_in', 'data_out', 'data_size_out', 'ctx_in', 'ctx_out', 'ctx_size_out' or 'repeat', got: '%s'?",
+			      *argv);
+			return -1;
+		}
+	}
+
+	err = get_run_data(data_fname_in, &data_in, &test_attr.data_size_in);
+	if (err)
+		return -1;
+
+	if (data_in) {
+		if (!test_attr.data_size_out)
+			test_attr.data_size_out = default_size;
+		err = alloc_run_data(&data_out, test_attr.data_size_out);
+		if (err)
+			goto free_data_in;
+	}
+
+	err = get_run_data(ctx_fname_in, &ctx_in, &test_attr.ctx_size_in);
+	if (err)
+		goto free_data_out;
+
+	if (ctx_in) {
+		if (!test_attr.ctx_size_out)
+			test_attr.ctx_size_out = default_size;
+		err = alloc_run_data(&ctx_out, test_attr.ctx_size_out);
+		if (err)
+			goto free_ctx_in;
+	}
+
+	test_attr.prog_fd	= fd;
+	test_attr.repeat	= repeat;
+	test_attr.data_in	= data_in;
+	test_attr.data_out	= data_out;
+	test_attr.ctx_in	= ctx_in;
+	test_attr.ctx_out	= ctx_out;
+
+	err = bpf_prog_test_run_xattr(&test_attr);
+	if (err) {
+		p_err("failed to run program: %s", strerror(errno));
+		goto free_ctx_out;
+	}
+
+	err = 0;
+
+	if (json_output)
+		jsonw_start_object(json_wtr);	/* root */
+
+	/* Do not exit on errors occurring when printing output data/context,
+	 * we still want to print return value and duration for program run.
+	 */
+	if (test_attr.data_size_out)
+		err += print_run_output(test_attr.data_out,
+					test_attr.data_size_out,
+					data_fname_out, "data_out");
+	if (test_attr.ctx_size_out)
+		err += print_run_output(test_attr.ctx_out,
+					test_attr.ctx_size_out,
+					ctx_fname_out, "ctx_out");
+
+	if (json_output) {
+		jsonw_uint_field(json_wtr, "retval", test_attr.retval);
+		jsonw_uint_field(json_wtr, "duration", test_attr.duration);
+		jsonw_end_object(json_wtr);	/* root */
+	} else {
+		fprintf(stdout, "Return value: %u, duration%s: %uns\n",
+			test_attr.retval,
+			repeat > 1 ? " (average)" : "", test_attr.duration);
+	}
+
+free_ctx_out:
+	free(ctx_out);
+free_ctx_in:
+	free(ctx_in);
+free_data_out:
+	free(data_out);
+free_data_in:
+	free(data_in);
+
+	return err;
+}
+
 static int load_with_options(int argc, char **argv, bool first_prog_only)
 {
-	enum bpf_attach_type expected_attach_type;
-	struct bpf_object_open_attr attr = {
-		.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	struct bpf_object_load_attr load_attr = { 0 };
+	struct bpf_object_open_attr open_attr = {
+		.prog_type = BPF_PROG_TYPE_UNSPEC,
 	};
+	enum bpf_attach_type expected_attach_type;
 	struct map_replace *map_replace = NULL;
 	struct bpf_program *prog = NULL, *pos;
 	unsigned int old_map_fds = 0;
@@ -767,7 +1107,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
 	if (!REQ_ARGS(2))
 		return -1;
-	attr.file = GET_ARG();
+	open_attr.file = GET_ARG();
 	pinfile = GET_ARG();
 
 	while (argc) {
@@ -776,7 +1116,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
 			NEXT_ARG();
 
-			if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
+			if (open_attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
 				p_err("program type already specified");
 				goto err_free_reuse_maps;
 			}
@@ -793,7 +1133,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 			strcat(type, *argv);
 			strcat(type, "/");
 
-			err = libbpf_prog_type_by_name(type, &attr.prog_type,
+			err = libbpf_prog_type_by_name(type,
+						       &open_attr.prog_type,
 						       &expected_attach_type);
 			free(type);
 			if (err < 0)
@@ -881,16 +1222,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
 	set_max_rlimit();
 
-	obj = __bpf_object__open_xattr(&attr, bpf_flags);
+	obj = __bpf_object__open_xattr(&open_attr, bpf_flags);
 	if (IS_ERR_OR_NULL(obj)) {
 		p_err("failed to open object file");
 		goto err_free_reuse_maps;
 	}
 
 	bpf_object__for_each_program(pos, obj) {
-		enum bpf_prog_type prog_type = attr.prog_type;
+		enum bpf_prog_type prog_type = open_attr.prog_type;
 
-		if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
+		if (open_attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
 			const char *sec_name = bpf_program__title(pos, false);
 
 			err = libbpf_prog_type_by_name(sec_name, &prog_type,
@@ -960,7 +1301,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 		goto err_close_obj;
 	}
 
-	err = bpf_object__load(obj);
+	load_attr.obj = obj;
+	if (verifier_logs)
+		/* log_level1 + log_level2 + stats, but not stable UAPI */
+		load_attr.log_level = 1 + 2 + 4;
+
+	err = bpf_object__load_xattr(&load_attr);
 	if (err) {
 		p_err("failed to load object file");
 		goto err_close_obj;
@@ -1051,6 +1397,11 @@ static int do_help(int argc, char **argv)
 		"                         [pinmaps MAP_DIR]\n"
 		"       %s %s attach PROG ATTACH_TYPE [MAP]\n"
 		"       %s %s detach PROG ATTACH_TYPE [MAP]\n"
+		"       %s %s run PROG \\\n"
+		"                         data_in FILE \\\n"
+		"                         [data_out FILE [data_size_out L]] \\\n"
+		"                         [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
+		"                         [repeat N]\n"
 		"       %s %s tracelog\n"
 		"       %s %s help\n"
 		"\n"
@@ -1064,14 +1415,16 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
 		"                 cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
-		"                 cgroup/recvmsg6 }\n"
+		"                 cgroup/recvmsg6 | cgroup/getsockopt |\n"
+		"                 cgroup/setsockopt }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2]);
 
 	return 0;
 }
@@ -1087,6 +1440,7 @@ static const struct cmd cmds[] = {
 	{ "attach",	do_attach },
 	{ "detach",	do_detach },
 	{ "tracelog",	do_tracelog },
+	{ "run",	do_run },
 	{ 0 }
 };
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 0bb17bf88b18..494d7ae3614d 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -31,9 +31,7 @@ void kernel_syms_load(struct dump_data *dd)
 	if (!fp)
 		return;
 
-	while (!feof(fp)) {
-		if (!fgets(buff, sizeof(buff), fp))
-			break;
+	while (fgets(buff, sizeof(buff), fp)) {
 		tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1,
 				   sizeof(*dd->sym_mapping));
 		if (!tmp) {
diff --git a/tools/include/linux/sizes.h b/tools/include/linux/sizes.h
new file mode 100644
index 000000000000..1cbb4c4d016e
--- /dev/null
+++ b/tools/include/linux/sizes.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * include/linux/sizes.h
+ */
+#ifndef __LINUX_SIZES_H__
+#define __LINUX_SIZES_H__
+
+#include <linux/const.h>
+
+#define SZ_1				0x00000001
+#define SZ_2				0x00000002
+#define SZ_4				0x00000004
+#define SZ_8				0x00000008
+#define SZ_16				0x00000010
+#define SZ_32				0x00000020
+#define SZ_64				0x00000040
+#define SZ_128				0x00000080
+#define SZ_256				0x00000100
+#define SZ_512				0x00000200
+
+#define SZ_1K				0x00000400
+#define SZ_2K				0x00000800
+#define SZ_4K				0x00001000
+#define SZ_8K				0x00002000
+#define SZ_16K				0x00004000
+#define SZ_32K				0x00008000
+#define SZ_64K				0x00010000
+#define SZ_128K				0x00020000
+#define SZ_256K				0x00040000
+#define SZ_512K				0x00080000
+
+#define SZ_1M				0x00100000
+#define SZ_2M				0x00200000
+#define SZ_4M				0x00400000
+#define SZ_8M				0x00800000
+#define SZ_16M				0x01000000
+#define SZ_32M				0x02000000
+#define SZ_64M				0x04000000
+#define SZ_128M				0x08000000
+#define SZ_256M				0x10000000
+#define SZ_512M				0x20000000
+
+#define SZ_1G				0x40000000
+#define SZ_2G				0x80000000
+
+#define SZ_4G				_AC(0x100000000, ULL)
+
+#endif /* __LINUX_SIZES_H__ */
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
new file mode 100644
index 000000000000..77f7c1638eb1
--- /dev/null
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_SOCKET_H
+#define __ASM_GENERIC_SOCKET_H
+
+#include <linux/posix_types.h>
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+#define SO_REUSEPORT	15
+#ifndef SO_PASSCRED /* powerpc only differs in these */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO_OLD	20
+#define SO_SNDTIMEO_OLD	21
+#endif
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		28
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+
+#define SO_MARK			36
+
+#define SO_PROTOCOL		38
+#define SO_DOMAIN		39
+
+#define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS	SO_WIFI_STATUS
+#define SO_PEEK_OFF		42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		43
+
+#define SO_LOCK_FILTER		44
+
+#define SO_SELECT_ERR_QUEUE	45
+
+#define SO_BUSY_POLL		46
+
+#define SO_MAX_PACING_RATE	47
+
+#define SO_BPF_EXTENSIONS	48
+
+#define SO_INCOMING_CPU		49
+
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
+#define SO_ATTACH_REUSEPORT_CBPF	51
+#define SO_ATTACH_REUSEPORT_EBPF	52
+
+#define SO_CNX_ADVICE		53
+
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
+#define SO_MEMINFO		55
+
+#define SO_INCOMING_NAPI_ID	56
+
+#define SO_COOKIE		57
+
+#define SCM_TIMESTAMPING_PKTINFO	58
+
+#define SO_PEERGROUPS		59
+
+#define SO_ZEROCOPY		60
+
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
+#define SO_BINDTOIFINDEX	62
+
+#define SO_TIMESTAMP_OLD        29
+#define SO_TIMESTAMPNS_OLD      35
+#define SO_TIMESTAMPING_OLD     37
+
+#define SO_TIMESTAMP_NEW        63
+#define SO_TIMESTAMPNS_NEW      64
+#define SO_TIMESTAMPING_NEW     65
+
+#define SO_RCVTIMEO_NEW         66
+#define SO_SNDTIMEO_NEW         67
+
+#define SO_DETACH_REUSEPORT_BPF 68
+
+#if !defined(__KERNEL__)
+
+#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+/* on 64-bit and x32, avoid the ?: operator */
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
+
+#define SO_RCVTIMEO		SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO		SO_SNDTIMEO_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
+#endif
+
+#define SCM_TIMESTAMP           SO_TIMESTAMP
+#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING        SO_TIMESTAMPING
+
+#endif
+
+#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 29a5bc3d5c66..402208581b2d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -170,6 +170,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
 	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+	BPF_PROG_TYPE_CGROUP_SOCKOPT,
 };
 
 enum bpf_attach_type {
@@ -194,6 +195,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_SYSCTL,
 	BPF_CGROUP_UDP4_RECVMSG,
 	BPF_CGROUP_UDP6_RECVMSG,
+	BPF_CGROUP_GETSOCKOPT,
+	BPF_CGROUP_SETSOCKOPT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -262,6 +265,24 @@ enum bpf_attach_type {
  */
 #define BPF_F_ANY_ALIGNMENT	(1U << 1)
 
+/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
+ * Verifier does sub-register def/use analysis and identifies instructions whose
+ * def only matters for low 32-bit, high 32-bit is never referenced later
+ * through implicit zero extension. Therefore verifier notifies JIT back-ends
+ * that it is safe to ignore clearing high 32-bit for these instructions. This
+ * saves some back-ends a lot of code-gen. However such optimization is not
+ * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
+ * hence hasn't used verifier's analysis result. But, we really want to have a
+ * way to be able to verify the correctness of the described optimization on
+ * x86_64 on which testsuites are frequently exercised.
+ *
+ * So, this flag is introduced. Once it is set, verifier will randomize high
+ * 32-bit for those instructions who has been identified as safe to ignore them.
+ * Then, if verifier is not doing correct analysis, such randomization will
+ * regress tests to expose bugs.
+ */
+#define BPF_F_TEST_RND_HI32	(1U << 2)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * two extensions:
  *
@@ -1746,6 +1767,7 @@ union bpf_attr {
  * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
  * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
  * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * 		* **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
  *
  * 		Therefore, this function can be used to clear a callback flag by
  * 		setting the appropriate bit to zero. e.g. to disable the RTO
@@ -2674,6 +2696,20 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf-local-storage cannot be found.
+ *
+ * int bpf_send_signal(u32 sig)
+ *	Description
+ *		Send signal *sig* to the current task.
+ *	Return
+ *		0 on success or successfully queued.
+ *
+ *		**-EBUSY** if work queue under nmi is full.
+ *
+ *		**-EINVAL** if *sig* is invalid.
+ *
+ *		**-EPERM** if no permission to send the *sig*.
+ *
+ *		**-EAGAIN** if bpf program can try again.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2784,7 +2820,8 @@ union bpf_attr {
 	FN(strtol),			\
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
-	FN(sk_storage_delete),
+	FN(sk_storage_delete),		\
+	FN(send_signal),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3033,6 +3070,12 @@ struct bpf_tcp_sock {
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
 				 */
+	__u32 dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups
+				 * total number of DSACK blocks received
+				 */
+	__u32 delivered;	/* Total data packets delivered incl. rexmits */
+	__u32 delivered_ce;	/* Like the above but only ECE marked packets */
+	__u32 icsk_retransmits;	/* Number of unrecovered [RTO] timeouts */
 };
 
 struct bpf_sock_tuple {
@@ -3052,6 +3095,10 @@ struct bpf_sock_tuple {
 	};
 };
 
+struct bpf_xdp_sock {
+	__u32 queue_id;
+};
+
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
@@ -3198,7 +3245,7 @@ struct bpf_sock_addr {
 	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
 				 * Stored in network byte order.
 				 */
-	__u32 user_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+	__u32 user_ip6[4];	/* Allows 1,2,4-byte read and 4,8-byte write.
 				 * Stored in network byte order.
 				 */
 	__u32 user_port;	/* Allows 4-byte read and write.
@@ -3207,12 +3254,13 @@ struct bpf_sock_addr {
 	__u32 family;		/* Allows 4-byte read, but no write */
 	__u32 type;		/* Allows 4-byte read, but no write */
 	__u32 protocol;		/* Allows 4-byte read, but no write */
-	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read an 4-byte write.
+	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read and 4-byte write.
 				 * Stored in network byte order.
 				 */
-	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read and 4,8-byte write.
 				 * Stored in network byte order.
 				 */
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
@@ -3264,13 +3312,15 @@ struct bpf_sock_ops {
 	__u32 sk_txhash;
 	__u64 bytes_received;
 	__u64 bytes_acked;
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
 #define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
 #define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
 #define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7		/* Mask of all currently
+#define BPF_SOCK_OPS_RTT_CB_FLAG	(1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0xF		/* Mask of all currently
 							 * supported cb flags
 							 */
 
@@ -3325,6 +3375,8 @@ enum {
 	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
 					 * socket transition to LISTEN state.
 					 */
+	BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
@@ -3503,4 +3555,15 @@ struct bpf_sysctl {
 				 */
 };
 
+struct bpf_sockopt {
+	__bpf_md_ptr(struct bpf_sock *, sk);
+	__bpf_md_ptr(void *, optval);
+	__bpf_md_ptr(void *, optval_end);
+
+	__s32	level;
+	__s32	optname;
+	__s32	optlen;
+	__s32	retval;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 5b225ff63b48..7d113a9602f0 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -636,6 +636,7 @@ enum {
 	IFLA_BOND_AD_USER_PORT_KEY,
 	IFLA_BOND_AD_ACTOR_SYSTEM,
 	IFLA_BOND_TLB_DYNAMIC_LB,
+	IFLA_BOND_PEER_NOTIF_DELAY,
 	__IFLA_BOND_MAX,
 };
 
diff --git a/tools/include/uapi/linux/if_tun.h b/tools/include/uapi/linux/if_tun.h
new file mode 100644
index 000000000000..454ae31b93c7
--- /dev/null
+++ b/tools/include/uapi/linux/if_tun.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  Universal TUN/TAP device driver.
+ *  Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ */
+
+#ifndef _UAPI__IF_TUN_H
+#define _UAPI__IF_TUN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/filter.h>
+
+/* Read queue size */
+#define TUN_READQ_SIZE	500
+/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */
+#define TUN_TUN_DEV 	IFF_TUN
+#define TUN_TAP_DEV	IFF_TAP
+#define TUN_TYPE_MASK   0x000f
+
+/* Ioctl defines */
+#define TUNSETNOCSUM  _IOW('T', 200, int) 
+#define TUNSETDEBUG   _IOW('T', 201, int) 
+#define TUNSETIFF     _IOW('T', 202, int) 
+#define TUNSETPERSIST _IOW('T', 203, int) 
+#define TUNSETOWNER   _IOW('T', 204, int)
+#define TUNSETLINK    _IOW('T', 205, int)
+#define TUNSETGROUP   _IOW('T', 206, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
+#define TUNSETOFFLOAD  _IOW('T', 208, unsigned int)
+#define TUNSETTXFILTER _IOW('T', 209, unsigned int)
+#define TUNGETIFF      _IOR('T', 210, unsigned int)
+#define TUNGETSNDBUF   _IOR('T', 211, int)
+#define TUNSETSNDBUF   _IOW('T', 212, int)
+#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
+#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
+#define TUNGETVNETHDRSZ _IOR('T', 215, int)
+#define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNSETQUEUE  _IOW('T', 217, int)
+#define TUNSETIFINDEX	_IOW('T', 218, unsigned int)
+#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNGETVNETLE _IOR('T', 221, int)
+/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on
+ * little-endian hosts. Not all kernel configurations support them, but all
+ * configurations that support SET also support GET.
+ */
+#define TUNSETVNETBE _IOW('T', 222, int)
+#define TUNGETVNETBE _IOR('T', 223, int)
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
+#define TUNSETFILTEREBPF _IOR('T', 225, int)
+#define TUNSETCARRIER _IOW('T', 226, int)
+#define TUNGETDEVNETNS _IO('T', 227)
+
+/* TUNSETIFF ifr flags */
+#define IFF_TUN		0x0001
+#define IFF_TAP		0x0002
+#define IFF_NAPI	0x0010
+#define IFF_NAPI_FRAGS	0x0020
+#define IFF_NO_PI	0x1000
+/* This flag has no real effect */
+#define IFF_ONE_QUEUE	0x2000
+#define IFF_VNET_HDR	0x4000
+#define IFF_TUN_EXCL	0x8000
+#define IFF_MULTI_QUEUE 0x0100
+#define IFF_ATTACH_QUEUE 0x0200
+#define IFF_DETACH_QUEUE 0x0400
+/* read-only flag */
+#define IFF_PERSIST	0x0800
+#define IFF_NOFILTER	0x1000
+
+/* Socket options */
+#define TUN_TX_TIMESTAMP 1
+
+/* Features for GSO (TUNSETOFFLOAD). */
+#define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */
+#define TUN_F_TSO4	0x02	/* I can handle TSO for IPv4 packets */
+#define TUN_F_TSO6	0x04	/* I can handle TSO for IPv6 packets */
+#define TUN_F_TSO_ECN	0x08	/* I can handle TSO with ECN bits. */
+#define TUN_F_UFO	0x10	/* I can handle UFO packets */
+
+/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
+#define TUN_PKT_STRIP	0x0001
+struct tun_pi {
+	__u16  flags;
+	__be16 proto;
+};
+
+/*
+ * Filter spec (used for SETXXFILTER ioctls)
+ * This stuff is applicable only to the TAP (Ethernet) devices.
+ * If the count is zero the filter is disabled and the driver accepts
+ * all packets (promisc mode).
+ * If the filter is enabled in order to accept broadcast packets
+ * broadcast addr must be explicitly included in the addr list.
+ */
+#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */
+struct tun_filter {
+	__u16  flags; /* TUN_FLT_ flags see above */
+	__u16  count; /* Number of addresses */
+	__u8   addr[0][ETH_ALEN];
+};
+
+#endif /* _UAPI__IF_TUN_H */
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index caed8b1614ff..faaa5ca2a117 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -46,6 +46,7 @@ struct xdp_mmap_offsets {
 #define XDP_UMEM_FILL_RING		5
 #define XDP_UMEM_COMPLETION_RING	6
 #define XDP_STATISTICS			7
+#define XDP_OPTIONS			8
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
@@ -60,6 +61,13 @@ struct xdp_statistics {
 	__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
 };
 
+struct xdp_options {
+	__u32 flags;
+};
+
+/* Flags for the flags field of struct xdp_options */
+#define XDP_OPTIONS_ZEROCOPY (1 << 0)
+
 /* Pgoff for mmaping the rings */
 #define XDP_PGOFF_RX_RING			  0
 #define XDP_PGOFF_TX_RING		 0x80000000
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
index 401d0c1e612d..12153771396a 100644
--- a/tools/include/uapi/linux/pkt_cls.h
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -257,7 +257,7 @@ enum {
 	TCA_FW_UNSPEC,
 	TCA_FW_CLASSID,
 	TCA_FW_POLICE,
-	TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
+	TCA_FW_INDEV,
 	TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
 	TCA_FW_MASK,
 	__TCA_FW_MAX
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index ee9d5362f35b..e3962cfbc9a6 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1,3 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
+	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
+	    btf_dump.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index f91639bf5650..9312066a1ae3 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -3,7 +3,7 @@
 
 BPF_VERSION = 0
 BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 3
+BPF_EXTRAVERSION = 4
 
 MAKEFLAGS += --no-print-directory
 
@@ -204,6 +204,16 @@ check_abi: $(OUTPUT)libbpf.so
 		     "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
 		     "Please make sure all LIBBPF_API symbols are"	 \
 		     "versioned in $(VERSION_SCRIPT)." >&2;		 \
+		readelf -s --wide $(OUTPUT)libbpf-in.o |		 \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'|   \
+		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
+		readelf -s --wide $(OUTPUT)libbpf.so |			 \
+		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
+		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
+		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
+		     $(OUTPUT)libbpf_versioned_syms.tmp;		 \
+		rm $(OUTPUT)libbpf_global_syms.tmp			 \
+		   $(OUTPUT)libbpf_versioned_syms.tmp;			 \
 		exit 1;							 \
 	fi
 
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
index cef7b77eab69..8928f7787f2d 100644
--- a/tools/lib/bpf/README.rst
+++ b/tools/lib/bpf/README.rst
@@ -9,7 +9,8 @@ described here. It's recommended to follow these conventions whenever a
 new function or type is added to keep libbpf API clean and consistent.
 
 All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``.
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
+``perf_buffer_``.
 
 System call wrappers
 --------------------
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c4a48086dc9a..c7d7993c44bb 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -26,10 +26,11 @@
 #include <memory.h>
 #include <unistd.h>
 #include <asm/unistd.h>
+#include <errno.h>
 #include <linux/bpf.h>
 #include "bpf.h"
 #include "libbpf.h"
-#include <errno.h>
+#include "libbpf_internal.h"
 
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
@@ -53,10 +54,6 @@
 # endif
 #endif
 
-#ifndef min
-#define min(x, y) ((x) < (y) ? (x) : (y))
-#endif
-
 static inline __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
@@ -256,6 +253,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	if (load_attr->name)
 		memcpy(attr.prog_name, load_attr->name,
 		       min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.prog_flags = load_attr->prog_flags;
 
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 	if (fd >= 0)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9593fec75652..ff42ca043dc8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -87,6 +87,7 @@ struct bpf_load_program_attr {
 	const void *line_info;
 	__u32 line_info_cnt;
 	__u32 log_level;
+	__u32 prog_flags;
 };
 
 /* Flags to direct loading requirements */
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index 6978314ea7f6..8c67561c93b0 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -6,10 +6,7 @@
 #include <linux/err.h>
 #include <linux/bpf.h>
 #include "libbpf.h"
-
-#ifndef min
-#define min(x, y) ((x) < (y) ? (x) : (y))
-#endif
+#include "libbpf_internal.h"
 
 struct bpf_prog_linfo {
 	void *raw_linfo;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 03348c4d6bd4..467224feb43b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4,17 +4,17 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <linux/err.h>
 #include <linux/btf.h>
+#include <gelf.h>
 #include "btf.h"
 #include "bpf.h"
 #include "libbpf.h"
 #include "libbpf_internal.h"
-
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define min(a, b) ((a) < (b) ? (a) : (b))
+#include "hashmap.h"
 
 #define BTF_MAX_NR_TYPES 0x7fffffff
 #define BTF_MAX_STR_OFFSET 0x7fffffff
@@ -417,6 +417,132 @@ done:
 	return btf;
 }
 
+static bool btf_check_endianness(const GElf_Ehdr *ehdr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+	Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
+	int err = 0, fd = -1, idx = 0;
+	struct btf *btf = NULL;
+	Elf_Scn *scn = NULL;
+	Elf *elf = NULL;
+	GElf_Ehdr ehdr;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_warning("failed to init libelf for %s\n", path);
+		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		err = -errno;
+		pr_warning("failed to open %s: %s\n", path, strerror(errno));
+		return ERR_PTR(err);
+	}
+
+	err = -LIBBPF_ERRNO__FORMAT;
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf) {
+		pr_warning("failed to open %s as ELF file\n", path);
+		goto done;
+	}
+	if (!gelf_getehdr(elf, &ehdr)) {
+		pr_warning("failed to get EHDR from %s\n", path);
+		goto done;
+	}
+	if (!btf_check_endianness(&ehdr)) {
+		pr_warning("non-native ELF endianness is not supported\n");
+		goto done;
+	}
+	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+		pr_warning("failed to get e_shstrndx from %s\n", path);
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		GElf_Shdr sh;
+		char *name;
+
+		idx++;
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			pr_warning("failed to get section(%d) header from %s\n",
+				   idx, path);
+			goto done;
+		}
+		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!name) {
+			pr_warning("failed to get section(%d) name from %s\n",
+				   idx, path);
+			goto done;
+		}
+		if (strcmp(name, BTF_ELF_SEC) == 0) {
+			btf_data = elf_getdata(scn, 0);
+			if (!btf_data) {
+				pr_warning("failed to get section(%d, %s) data from %s\n",
+					   idx, name, path);
+				goto done;
+			}
+			continue;
+		} else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+			btf_ext_data = elf_getdata(scn, 0);
+			if (!btf_ext_data) {
+				pr_warning("failed to get section(%d, %s) data from %s\n",
+					   idx, name, path);
+				goto done;
+			}
+			continue;
+		}
+	}
+
+	err = 0;
+
+	if (!btf_data) {
+		err = -ENOENT;
+		goto done;
+	}
+	btf = btf__new(btf_data->d_buf, btf_data->d_size);
+	if (IS_ERR(btf))
+		goto done;
+
+	if (btf_ext && btf_ext_data) {
+		*btf_ext = btf_ext__new(btf_ext_data->d_buf,
+					btf_ext_data->d_size);
+		if (IS_ERR(*btf_ext))
+			goto done;
+	} else if (btf_ext) {
+		*btf_ext = NULL;
+	}
+done:
+	if (elf)
+		elf_end(elf);
+	close(fd);
+
+	if (err)
+		return ERR_PTR(err);
+	/*
+	 * btf is always parsed before btf_ext, so no need to clean up
+	 * btf_ext, if btf loading failed
+	 */
+	if (IS_ERR(btf))
+		return btf;
+	if (btf_ext && IS_ERR(*btf_ext)) {
+		btf__free(btf);
+		err = PTR_ERR(*btf_ext);
+		return ERR_PTR(err);
+	}
+	return btf;
+}
+
 static int compare_vsi_off(const void *_a, const void *_b)
 {
 	const struct btf_var_secinfo *a = _a;
@@ -1165,16 +1291,9 @@ done:
 	return err;
 }
 
-#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
-#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31
 #define BTF_UNPROCESSED_ID ((__u32)-1)
 #define BTF_IN_PROGRESS_ID ((__u32)-2)
 
-struct btf_dedup_node {
-	struct btf_dedup_node *next;
-	__u32 type_id;
-};
-
 struct btf_dedup {
 	/* .BTF section to be deduped in-place */
 	struct btf *btf;
@@ -1190,7 +1309,7 @@ struct btf_dedup {
 	 * candidates, which is fine because we rely on subsequent
 	 * btf_xxx_equal() checks to authoritatively verify type equality.
 	 */
-	struct btf_dedup_node **dedup_table;
+	struct hashmap *dedup_table;
 	/* Canonical types map */
 	__u32 *map;
 	/* Hypothetical mapping, used during type graph equivalence checks */
@@ -1215,30 +1334,18 @@ struct btf_str_ptrs {
 	__u32 cap;
 };
 
-static inline __u32 hash_combine(__u32 h, __u32 value)
+static long hash_combine(long h, long value)
 {
-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define GOLDEN_RATIO_PRIME 0x9e370001UL
-	return h * 37 + value * GOLDEN_RATIO_PRIME;
-#undef GOLDEN_RATIO_PRIME
+	return h * 31 + value;
 }
 
-#define for_each_dedup_cand(d, hash, node) \
-	for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
-	     node;							   \
-	     node = node->next)
+#define for_each_dedup_cand(d, node, hash) \
+	hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
 
-static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
+static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
 {
-	struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
-	int bucket = hash & (d->opts.dedup_table_size - 1);
-
-	if (!node)
-		return -ENOMEM;
-	node->type_id = type_id;
-	node->next = d->dedup_table[bucket];
-	d->dedup_table[bucket] = node;
-	return 0;
+	return hashmap__append(d->dedup_table,
+			       (void *)hash, (void *)(long)type_id);
 }
 
 static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@@ -1267,36 +1374,10 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
 	d->hypot_cnt = 0;
 }
 
-static void btf_dedup_table_free(struct btf_dedup *d)
-{
-	struct btf_dedup_node *head, *tmp;
-	int i;
-
-	if (!d->dedup_table)
-		return;
-
-	for (i = 0; i < d->opts.dedup_table_size; i++) {
-		while (d->dedup_table[i]) {
-			tmp = d->dedup_table[i];
-			d->dedup_table[i] = tmp->next;
-			free(tmp);
-		}
-
-		head = d->dedup_table[i];
-		while (head) {
-			tmp = head;
-			head = head->next;
-			free(tmp);
-		}
-	}
-
-	free(d->dedup_table);
-	d->dedup_table = NULL;
-}
-
 static void btf_dedup_free(struct btf_dedup *d)
 {
-	btf_dedup_table_free(d);
+	hashmap__free(d->dedup_table);
+	d->dedup_table = NULL;
 
 	free(d->map);
 	d->map = NULL;
@@ -1310,40 +1391,43 @@ static void btf_dedup_free(struct btf_dedup *d)
 	free(d);
 }
 
-/* Find closest power of two >= to size, capped at 2^max_size_log */
-static __u32 roundup_pow2_max(__u32 size, int max_size_log)
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
 {
-	int i;
+	return (size_t)key;
+}
 
-	for (i = 0; i < max_size_log  && (1U << i) < size;  i++)
-		;
-	return 1U << i;
+static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+{
+	return 0;
 }
 
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+	return k1 == k2;
+}
 
 static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 				       const struct btf_dedup_opts *opts)
 {
 	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
+	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
 	int i, err = 0;
-	__u32 sz;
 
 	if (!d)
 		return ERR_PTR(-ENOMEM);
 
 	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
-	sz = opts && opts->dedup_table_size ? opts->dedup_table_size
-					    : BTF_DEDUP_TABLE_DEFAULT_SIZE;
-	sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG);
-	d->opts.dedup_table_size = sz;
+	/* dedup_table_size is now used only to force collisions in tests */
+	if (opts && opts->dedup_table_size == 1)
+		hash_fn = btf_dedup_collision_hash_fn;
 
 	d->btf = btf;
 	d->btf_ext = btf_ext;
 
-	d->dedup_table = calloc(d->opts.dedup_table_size,
-				sizeof(struct btf_dedup_node *));
-	if (!d->dedup_table) {
-		err = -ENOMEM;
+	d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
+	if (IS_ERR(d->dedup_table)) {
+		err = PTR_ERR(d->dedup_table);
+		d->dedup_table = NULL;
 		goto done;
 	}
 
@@ -1662,9 +1746,9 @@ done:
 	return err;
 }
 
-static __u32 btf_hash_common(struct btf_type *t)
+static long btf_hash_common(struct btf_type *t)
 {
-	__u32 h;
+	long h;
 
 	h = hash_combine(0, t->name_off);
 	h = hash_combine(h, t->info);
@@ -1680,10 +1764,10 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
 }
 
 /* Calculate type signature hash of INT. */
-static __u32 btf_hash_int(struct btf_type *t)
+static long btf_hash_int(struct btf_type *t)
 {
 	__u32 info = *(__u32 *)(t + 1);
-	__u32 h;
+	long h;
 
 	h = btf_hash_common(t);
 	h = hash_combine(h, info);
@@ -1703,9 +1787,9 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
 }
 
 /* Calculate type signature hash of ENUM. */
-static __u32 btf_hash_enum(struct btf_type *t)
+static long btf_hash_enum(struct btf_type *t)
 {
-	__u32 h;
+	long h;
 
 	/* don't hash vlen and enum members to support enum fwd resolving */
 	h = hash_combine(0, t->name_off);
@@ -1757,11 +1841,11 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
  * as referenced type IDs equivalence is established separately during type
  * graph equivalence check algorithm.
  */
-static __u32 btf_hash_struct(struct btf_type *t)
+static long btf_hash_struct(struct btf_type *t)
 {
 	struct btf_member *member = (struct btf_member *)(t + 1);
 	__u32 vlen = BTF_INFO_VLEN(t->info);
-	__u32 h = btf_hash_common(t);
+	long h = btf_hash_common(t);
 	int i;
 
 	for (i = 0; i < vlen; i++) {
@@ -1804,10 +1888,10 @@ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
  * under assumption that they were already resolved to canonical type IDs and
  * are not going to change.
  */
-static __u32 btf_hash_array(struct btf_type *t)
+static long btf_hash_array(struct btf_type *t)
 {
 	struct btf_array *info = (struct btf_array *)(t + 1);
-	__u32 h = btf_hash_common(t);
+	long h = btf_hash_common(t);
 
 	h = hash_combine(h, info->type);
 	h = hash_combine(h, info->index_type);
@@ -1858,11 +1942,11 @@ static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
  * under assumption that they were already resolved to canonical type IDs and
  * are not going to change.
  */
-static inline __u32 btf_hash_fnproto(struct btf_type *t)
+static long btf_hash_fnproto(struct btf_type *t)
 {
 	struct btf_param *member = (struct btf_param *)(t + 1);
 	__u16 vlen = BTF_INFO_VLEN(t->info);
-	__u32 h = btf_hash_common(t);
+	long h = btf_hash_common(t);
 	int i;
 
 	for (i = 0; i < vlen; i++) {
@@ -1880,7 +1964,7 @@ static inline __u32 btf_hash_fnproto(struct btf_type *t)
  * This function is called during reference types deduplication to compare
  * FUNC_PROTO to potential canonical representative.
  */
-static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
+static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
 {
 	struct btf_param *m1, *m2;
 	__u16 vlen;
@@ -1906,7 +1990,7 @@ static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
  * IDs. This check is performed during type graph equivalence check and
  * referenced types equivalence is checked separately.
  */
-static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
+static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 {
 	struct btf_param *m1, *m2;
 	__u16 vlen;
@@ -1937,11 +2021,12 @@ static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 {
 	struct btf_type *t = d->btf->types[type_id];
+	struct hashmap_entry *hash_entry;
 	struct btf_type *cand;
-	struct btf_dedup_node *cand_node;
 	/* if we don't find equivalent type, then we are canonical */
 	__u32 new_id = type_id;
-	__u32 h;
+	__u32 cand_id;
+	long h;
 
 	switch (BTF_INFO_KIND(t->info)) {
 	case BTF_KIND_CONST:
@@ -1960,10 +2045,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_INT:
 		h = btf_hash_int(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_int(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -1971,10 +2057,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_ENUM:
 		h = btf_hash_enum(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_enum(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 			if (d->opts.dont_resolve_fwds)
@@ -1982,21 +2069,22 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 			if (btf_compat_enum(t, cand)) {
 				if (btf_is_enum_fwd(t)) {
 					/* resolve fwd to full enum */
-					new_id = cand_node->type_id;
+					new_id = cand_id;
 					break;
 				}
 				/* resolve canonical enum fwd to full enum */
-				d->map[cand_node->type_id] = type_id;
+				d->map[cand_id] = type_id;
 			}
 		}
 		break;
 
 	case BTF_KIND_FWD:
 		h = btf_hash_common(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_common(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2397,12 +2485,12 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
  */
 static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 {
-	struct btf_dedup_node *cand_node;
 	struct btf_type *cand_type, *t;
+	struct hashmap_entry *hash_entry;
 	/* if we don't find equivalent type, then we are canonical */
 	__u32 new_id = type_id;
 	__u16 kind;
-	__u32 h;
+	long h;
 
 	/* already deduped or is in process of deduping (loop detected) */
 	if (d->map[type_id] <= BTF_MAX_NR_TYPES)
@@ -2415,7 +2503,8 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		return 0;
 
 	h = btf_hash_struct(t);
-	for_each_dedup_cand(d, h, cand_node) {
+	for_each_dedup_cand(d, hash_entry, h) {
+		__u32 cand_id = (__u32)(long)hash_entry->value;
 		int eq;
 
 		/*
@@ -2428,17 +2517,17 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
 		 * FWD and compatible STRUCT/UNION are considered equivalent.
 		 */
-		cand_type = d->btf->types[cand_node->type_id];
+		cand_type = d->btf->types[cand_id];
 		if (!btf_shallow_equal_struct(t, cand_type))
 			continue;
 
 		btf_dedup_clear_hypot_map(d);
-		eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id);
+		eq = btf_dedup_is_equiv(d, type_id, cand_id);
 		if (eq < 0)
 			return eq;
 		if (!eq)
 			continue;
-		new_id = cand_node->type_id;
+		new_id = cand_id;
 		btf_dedup_merge_hypot_map(d);
 		break;
 	}
@@ -2488,12 +2577,12 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
  */
 static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 {
-	struct btf_dedup_node *cand_node;
+	struct hashmap_entry *hash_entry;
+	__u32 new_id = type_id, cand_id;
 	struct btf_type *t, *cand;
 	/* if we don't find equivalent type, then we are representative type */
-	__u32 new_id = type_id;
 	int ref_type_id;
-	__u32 h;
+	long h;
 
 	if (d->map[type_id] == BTF_IN_PROGRESS_ID)
 		return -ELOOP;
@@ -2516,10 +2605,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		t->type = ref_type_id;
 
 		h = btf_hash_common(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_common(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2539,10 +2629,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		info->index_type = ref_type_id;
 
 		h = btf_hash_array(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_array(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2570,10 +2661,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		}
 
 		h = btf_hash_fnproto(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_fnproto(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2600,7 +2692,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
 		if (err < 0)
 			return err;
 	}
-	btf_dedup_table_free(d);
+	/* we won't need d->dedup_table anymore */
+	hashmap__free(d->dedup_table);
+	d->dedup_table = NULL;
 	return 0;
 }
 
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index c7b399e81fce..88a52ae56fc6 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -4,6 +4,7 @@
 #ifndef __LIBBPF_BTF_H
 #define __LIBBPF_BTF_H
 
+#include <stdarg.h>
 #include <linux/types.h>
 
 #ifdef __cplusplus
@@ -16,6 +17,7 @@ extern "C" {
 
 #define BTF_ELF_SEC ".BTF"
 #define BTF_EXT_ELF_SEC ".BTF.ext"
+#define MAPS_ELF_SEC ".maps"
 
 struct btf;
 struct btf_ext;
@@ -59,6 +61,8 @@ struct btf_ext_header {
 
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf *btf__parse_elf(const char *path,
+				      struct btf_ext **btf_ext);
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
@@ -100,6 +104,22 @@ struct btf_dedup_opts {
 LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 			  const struct btf_dedup_opts *opts);
 
+struct btf_dump;
+
+struct btf_dump_opts {
+	void *ctx;
+};
+
+typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
+
+LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
+					  const struct btf_ext *btf_ext,
+					  const struct btf_dump_opts *opts,
+					  btf_dump_printf_fn_t printf_fn);
+LIBBPF_API void btf_dump__free(struct btf_dump *d);
+
+LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
new file mode 100644
index 000000000000..7065bb5b2752
--- /dev/null
+++ b/tools/lib/bpf/btf_dump.c
@@ -0,0 +1,1333 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C type converter.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "hashmap.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
+static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
+
+static const char *pfx(int lvl)
+{
+	return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl];
+}
+
+enum btf_dump_type_order_state {
+	NOT_ORDERED,
+	ORDERING,
+	ORDERED,
+};
+
+enum btf_dump_type_emit_state {
+	NOT_EMITTED,
+	EMITTING,
+	EMITTED,
+};
+
+/* per-type auxiliary state */
+struct btf_dump_type_aux_state {
+	/* topological sorting state */
+	enum btf_dump_type_order_state order_state: 2;
+	/* emitting state used to determine the need for forward declaration */
+	enum btf_dump_type_emit_state emit_state: 2;
+	/* whether forward declaration was already emitted */
+	__u8 fwd_emitted: 1;
+	/* whether unique non-duplicate name was already assigned */
+	__u8 name_resolved: 1;
+};
+
+struct btf_dump {
+	const struct btf *btf;
+	const struct btf_ext *btf_ext;
+	btf_dump_printf_fn_t printf_fn;
+	struct btf_dump_opts opts;
+
+	/* per-type auxiliary state */
+	struct btf_dump_type_aux_state *type_states;
+	/* per-type optional cached unique name, must be freed, if present */
+	const char **cached_names;
+
+	/* topo-sorted list of dependent type definitions */
+	__u32 *emit_queue;
+	int emit_queue_cap;
+	int emit_queue_cnt;
+
+	/*
+	 * stack of type declarations (e.g., chain of modifiers, arrays,
+	 * funcs, etc)
+	 */
+	__u32 *decl_stack;
+	int decl_stack_cap;
+	int decl_stack_cnt;
+
+	/* maps struct/union/enum name to a number of name occurrences */
+	struct hashmap *type_names;
+	/*
+	 * maps typedef identifiers and enum value names to a number of such
+	 * name occurrences
+	 */
+	struct hashmap *ident_names;
+};
+
+static size_t str_hash_fn(const void *key, void *ctx)
+{
+	const char *s = key;
+	size_t h = 0;
+
+	while (*s) {
+		h = h * 31 + *s;
+		s++;
+	}
+	return h;
+}
+
+static bool str_equal_fn(const void *a, const void *b, void *ctx)
+{
+	return strcmp(a, b) == 0;
+}
+
+static __u16 btf_kind_of(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info);
+}
+
+static __u16 btf_vlen_of(const struct btf_type *t)
+{
+	return BTF_INFO_VLEN(t->info);
+}
+
+static bool btf_kflag_of(const struct btf_type *t)
+{
+	return BTF_INFO_KFLAG(t->info);
+}
+
+static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
+{
+	return btf__name_by_offset(d->btf, name_off);
+}
+
+static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	d->printf_fn(d->opts.ctx, fmt, args);
+	va_end(args);
+}
+
+struct btf_dump *btf_dump__new(const struct btf *btf,
+			       const struct btf_ext *btf_ext,
+			       const struct btf_dump_opts *opts,
+			       btf_dump_printf_fn_t printf_fn)
+{
+	struct btf_dump *d;
+	int err;
+
+	d = calloc(1, sizeof(struct btf_dump));
+	if (!d)
+		return ERR_PTR(-ENOMEM);
+
+	d->btf = btf;
+	d->btf_ext = btf_ext;
+	d->printf_fn = printf_fn;
+	d->opts.ctx = opts ? opts->ctx : NULL;
+
+	d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (IS_ERR(d->type_names)) {
+		err = PTR_ERR(d->type_names);
+		d->type_names = NULL;
+		btf_dump__free(d);
+		return ERR_PTR(err);
+	}
+	d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (IS_ERR(d->ident_names)) {
+		err = PTR_ERR(d->ident_names);
+		d->ident_names = NULL;
+		btf_dump__free(d);
+		return ERR_PTR(err);
+	}
+
+	return d;
+}
+
+void btf_dump__free(struct btf_dump *d)
+{
+	int i, cnt;
+
+	if (!d)
+		return;
+
+	free(d->type_states);
+	if (d->cached_names) {
+		/* any set cached name is owned by us and should be freed */
+		for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
+			if (d->cached_names[i])
+				free((void *)d->cached_names[i]);
+		}
+	}
+	free(d->cached_names);
+	free(d->emit_queue);
+	free(d->decl_stack);
+	hashmap__free(d->type_names);
+	hashmap__free(d->ident_names);
+
+	free(d);
+}
+
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
+
+/*
+ * Dump BTF type in a compilable C syntax, including all the necessary
+ * dependent types, necessary for compilation. If some of the dependent types
+ * were already emitted as part of previous btf_dump__dump_type() invocation
+ * for another type, they won't be emitted again. This API allows callers to
+ * filter out BTF types according to user-defined criterias and emitted only
+ * minimal subset of types, necessary to compile everything. Full struct/union
+ * definitions will still be emitted, even if the only usage is through
+ * pointer and could be satisfied with just a forward declaration.
+ *
+ * Dumping is done in two high-level passes:
+ *   1. Topologically sort type definitions to satisfy C rules of compilation.
+ *   2. Emit type definitions in C syntax.
+ *
+ * Returns 0 on success; <0, otherwise.
+ */
+int btf_dump__dump_type(struct btf_dump *d, __u32 id)
+{
+	int err, i;
+
+	if (id > btf__get_nr_types(d->btf))
+		return -EINVAL;
+
+	/* type states are lazily allocated, as they might not be needed */
+	if (!d->type_states) {
+		d->type_states = calloc(1 + btf__get_nr_types(d->btf),
+					sizeof(d->type_states[0]));
+		if (!d->type_states)
+			return -ENOMEM;
+		d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
+					 sizeof(d->cached_names[0]));
+		if (!d->cached_names)
+			return -ENOMEM;
+
+		/* VOID is special */
+		d->type_states[0].order_state = ORDERED;
+		d->type_states[0].emit_state = EMITTED;
+	}
+
+	d->emit_queue_cnt = 0;
+	err = btf_dump_order_type(d, id, false);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < d->emit_queue_cnt; i++)
+		btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
+
+	return 0;
+}
+
+static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
+{
+	__u32 *new_queue;
+	size_t new_cap;
+
+	if (d->emit_queue_cnt >= d->emit_queue_cap) {
+		new_cap = max(16, d->emit_queue_cap * 3 / 2);
+		new_queue = realloc(d->emit_queue,
+				    new_cap * sizeof(new_queue[0]));
+		if (!new_queue)
+			return -ENOMEM;
+		d->emit_queue = new_queue;
+		d->emit_queue_cap = new_cap;
+	}
+
+	d->emit_queue[d->emit_queue_cnt++] = id;
+	return 0;
+}
+
+/*
+ * Determine order of emitting dependent types and specified type to satisfy
+ * C compilation rules.  This is done through topological sorting with an
+ * additional complication which comes from C rules. The main idea for C is
+ * that if some type is "embedded" into a struct/union, it's size needs to be
+ * known at the time of definition of containing type. E.g., for:
+ *
+ *	struct A {};
+ *	struct B { struct A x; }
+ *
+ * struct A *HAS* to be defined before struct B, because it's "embedded",
+ * i.e., it is part of struct B layout. But in the following case:
+ *
+ *	struct A;
+ *	struct B { struct A *x; }
+ *	struct A {};
+ *
+ * it's enough to just have a forward declaration of struct A at the time of
+ * struct B definition, as struct B has a pointer to struct A, so the size of
+ * field x is known without knowing struct A size: it's sizeof(void *).
+ *
+ * Unfortunately, there are some trickier cases we need to handle, e.g.:
+ *
+ *	struct A {}; // if this was forward-declaration: compilation error
+ *	struct B {
+ *		struct { // anonymous struct
+ *			struct A y;
+ *		} *x;
+ *	};
+ *
+ * In this case, struct B's field x is a pointer, so it's size is known
+ * regardless of the size of (anonymous) struct it points to. But because this
+ * struct is anonymous and thus defined inline inside struct B, *and* it
+ * embeds struct A, compiler requires full definition of struct A to be known
+ * before struct B can be defined. This creates a transitive dependency
+ * between struct A and struct B. If struct A was forward-declared before
+ * struct B definition and fully defined after struct B definition, that would
+ * trigger compilation error.
+ *
+ * All this means that while we are doing topological sorting on BTF type
+ * graph, we need to determine relationships between different types (graph
+ * nodes):
+ *   - weak link (relationship) between X and Y, if Y *CAN* be
+ *   forward-declared at the point of X definition;
+ *   - strong link, if Y *HAS* to be fully-defined before X can be defined.
+ *
+ * The rule is as follows. Given a chain of BTF types from X to Y, if there is
+ * BTF_KIND_PTR type in the chain and at least one non-anonymous type
+ * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
+ * Weak/strong relationship is determined recursively during DFS traversal and
+ * is returned as a result from btf_dump_order_type().
+ *
+ * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
+ * but it is not guaranteeing that no extraneous forward declarations will be
+ * emitted.
+ *
+ * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
+ * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
+ * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
+ * entire graph path, so depending where from one came to that BTF type, it
+ * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
+ * once they are processed, there is no need to do it again, so they are
+ * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
+ * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
+ * in any case, once those are processed, no need to do it again, as the
+ * result won't change.
+ *
+ * Returns:
+ *   - 1, if type is part of strong link (so there is strong topological
+ *   ordering requirements);
+ *   - 0, if type is part of weak link (so can be satisfied through forward
+ *   declaration);
+ *   - <0, on error (e.g., unsatisfiable type loop detected).
+ */
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
+{
+	/*
+	 * Order state is used to detect strong link cycles, but only for BTF
+	 * kinds that are or could be an independent definition (i.e.,
+	 * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays,
+	 * func_protos, modifiers are just means to get to these definitions.
+	 * Int/void don't need definitions, they are assumed to be always
+	 * properly defined.  We also ignore datasec, var, and funcs for now.
+	 * So for all non-defining kinds, we never even set ordering state,
+	 * for defining kinds we set ORDERING and subsequently ORDERED if it
+	 * forms a strong link.
+	 */
+	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+	const struct btf_type *t;
+	__u16 kind, vlen;
+	int err, i;
+
+	/* return true, letting typedefs know that it's ok to be emitted */
+	if (tstate->order_state == ORDERED)
+		return 1;
+
+	t = btf__type_by_id(d->btf, id);
+	kind = btf_kind_of(t);
+
+	if (tstate->order_state == ORDERING) {
+		/* type loop, but resolvable through fwd declaration */
+		if ((kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION) &&
+		    through_ptr && t->name_off != 0)
+			return 0;
+		pr_warning("unsatisfiable type cycle, id:[%u]\n", id);
+		return -ELOOP;
+	}
+
+	switch (kind) {
+	case BTF_KIND_INT:
+		tstate->order_state = ORDERED;
+		return 0;
+
+	case BTF_KIND_PTR:
+		err = btf_dump_order_type(d, t->type, true);
+		tstate->order_state = ORDERED;
+		return err;
+
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *a = (void *)(t + 1);
+
+		return btf_dump_order_type(d, a->type, through_ptr);
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = (void *)(t + 1);
+		/*
+		 * struct/union is part of strong link, only if it's embedded
+		 * (so no ptr in a path) or it's anonymous (so has to be
+		 * defined inline, even if declared through ptr)
+		 */
+		if (through_ptr && t->name_off != 0)
+			return 0;
+
+		tstate->order_state = ORDERING;
+
+		vlen = btf_vlen_of(t);
+		for (i = 0; i < vlen; i++, m++) {
+			err = btf_dump_order_type(d, m->type, false);
+			if (err < 0)
+				return err;
+		}
+
+		if (t->name_off != 0) {
+			err = btf_dump_add_emit_queue_id(d, id);
+			if (err < 0)
+				return err;
+		}
+
+		tstate->order_state = ORDERED;
+		return 1;
+	}
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+		if (t->name_off != 0) {
+			err = btf_dump_add_emit_queue_id(d, id);
+			if (err)
+				return err;
+		}
+		tstate->order_state = ORDERED;
+		return 1;
+
+	case BTF_KIND_TYPEDEF: {
+		int is_strong;
+
+		is_strong = btf_dump_order_type(d, t->type, through_ptr);
+		if (is_strong < 0)
+			return is_strong;
+
+		/* typedef is similar to struct/union w.r.t. fwd-decls */
+		if (through_ptr && !is_strong)
+			return 0;
+
+		/* typedef is always a named definition */
+		err = btf_dump_add_emit_queue_id(d, id);
+		if (err)
+			return err;
+
+		d->type_states[id].order_state = ORDERED;
+		return 1;
+	}
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return btf_dump_order_type(d, t->type, through_ptr);
+
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = (void *)(t + 1);
+		bool is_strong;
+
+		err = btf_dump_order_type(d, t->type, through_ptr);
+		if (err < 0)
+			return err;
+		is_strong = err > 0;
+
+		vlen = btf_vlen_of(t);
+		for (i = 0; i < vlen; i++, p++) {
+			err = btf_dump_order_type(d, p->type, through_ptr);
+			if (err < 0)
+				return err;
+			if (err > 0)
+				is_strong = true;
+		}
+		return is_strong;
+	}
+	case BTF_KIND_FUNC:
+	case BTF_KIND_VAR:
+	case BTF_KIND_DATASEC:
+		d->type_states[id].order_state = ORDERED;
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t);
+static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t);
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+				  const struct btf_type *t);
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+				      const struct btf_type *t, int lvl);
+
+/* a local view into a shared stack */
+struct id_stack {
+	const __u32 *ids;
+	int cnt;
+};
+
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+				    const char *fname, int lvl);
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+				     struct id_stack *decl_stack,
+				     const char *fname, int lvl);
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id);
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+				 const char *orig_name);
+
+static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(d->btf, id);
+
+	/* __builtin_va_list is a compiler built-in, which causes compilation
+	 * errors, when compiling w/ different compiler, then used to compile
+	 * original code (e.g., GCC to compile kernel, Clang to use generated
+	 * C header from BTF). As it is built-in, it should be already defined
+	 * properly internally in compiler.
+	 */
+	if (t->name_off == 0)
+		return false;
+	return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
+}
+
+/*
+ * Emit C-syntax definitions of types from chains of BTF types.
+ *
+ * High-level handling of determining necessary forward declarations are handled
+ * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
+ * declarations/definitions in C syntax  are handled by a combo of
+ * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
+ * corresponding btf_dump_emit_*_{def,fwd}() functions.
+ *
+ * We also keep track of "containing struct/union type ID" to determine when
+ * we reference it from inside and thus can avoid emitting unnecessary forward
+ * declaration.
+ *
+ * This algorithm is designed in such a way, that even if some error occurs
+ * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
+ * that doesn't comply to C rules completely), algorithm will try to proceed
+ * and produce as much meaningful output as possible.
+ */
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
+{
+	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+	bool top_level_def = cont_id == 0;
+	const struct btf_type *t;
+	__u16 kind;
+
+	if (tstate->emit_state == EMITTED)
+		return;
+
+	t = btf__type_by_id(d->btf, id);
+	kind = btf_kind_of(t);
+
+	if (top_level_def && t->name_off == 0) {
+		pr_warning("unexpected nameless definition, id:[%u]\n", id);
+		return;
+	}
+
+	if (tstate->emit_state == EMITTING) {
+		if (tstate->fwd_emitted)
+			return;
+
+		switch (kind) {
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			/*
+			 * if we are referencing a struct/union that we are
+			 * part of - then no need for fwd declaration
+			 */
+			if (id == cont_id)
+				return;
+			if (t->name_off == 0) {
+				pr_warning("anonymous struct/union loop, id:[%u]\n",
+					   id);
+				return;
+			}
+			btf_dump_emit_struct_fwd(d, id, t);
+			btf_dump_printf(d, ";\n\n");
+			tstate->fwd_emitted = 1;
+			break;
+		case BTF_KIND_TYPEDEF:
+			/*
+			 * for typedef fwd_emitted means typedef definition
+			 * was emitted, but it can be used only for "weak"
+			 * references through pointer only, not for embedding
+			 */
+			if (!btf_dump_is_blacklisted(d, id)) {
+				btf_dump_emit_typedef_def(d, id, t, 0);
+				btf_dump_printf(d, ";\n\n");
+			};
+			tstate->fwd_emitted = 1;
+			break;
+		default:
+			break;
+		}
+
+		return;
+	}
+
+	switch (kind) {
+	case BTF_KIND_INT:
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_ENUM:
+		if (top_level_def) {
+			btf_dump_emit_enum_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+		}
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_PTR:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		btf_dump_emit_type(d, t->type, cont_id);
+		break;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *a = (void *)(t + 1);
+
+		btf_dump_emit_type(d, a->type, cont_id);
+		break;
+	}
+	case BTF_KIND_FWD:
+		btf_dump_emit_fwd_def(d, id, t);
+		btf_dump_printf(d, ";\n\n");
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_TYPEDEF:
+		tstate->emit_state = EMITTING;
+		btf_dump_emit_type(d, t->type, id);
+		/*
+		 * typedef can server as both definition and forward
+		 * declaration; at this stage someone depends on
+		 * typedef as a forward declaration (refers to it
+		 * through pointer), so unless we already did it,
+		 * emit typedef as a forward declaration
+		 */
+		if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
+			btf_dump_emit_typedef_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+		}
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		tstate->emit_state = EMITTING;
+		/* if it's a top-level struct/union definition or struct/union
+		 * is anonymous, then in C we'll be emitting all fields and
+		 * their types (as opposed to just `struct X`), so we need to
+		 * make sure that all types, referenced from struct/union
+		 * members have necessary forward-declarations, where
+		 * applicable
+		 */
+		if (top_level_def || t->name_off == 0) {
+			const struct btf_member *m = (void *)(t + 1);
+			__u16 vlen = btf_vlen_of(t);
+			int i, new_cont_id;
+
+			new_cont_id = t->name_off == 0 ? cont_id : id;
+			for (i = 0; i < vlen; i++, m++)
+				btf_dump_emit_type(d, m->type, new_cont_id);
+		} else if (!tstate->fwd_emitted && id != cont_id) {
+			btf_dump_emit_struct_fwd(d, id, t);
+			btf_dump_printf(d, ";\n\n");
+			tstate->fwd_emitted = 1;
+		}
+
+		if (top_level_def) {
+			btf_dump_emit_struct_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+			tstate->emit_state = EMITTED;
+		} else {
+			tstate->emit_state = NOT_EMITTED;
+		}
+		break;
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = (void *)(t + 1);
+		__u16 vlen = btf_vlen_of(t);
+		int i;
+
+		btf_dump_emit_type(d, t->type, cont_id);
+		for (i = 0; i < vlen; i++, p++)
+			btf_dump_emit_type(d, p->type, cont_id);
+
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+static int btf_align_of(const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+	__u16 kind = btf_kind_of(t);
+
+	switch (kind) {
+	case BTF_KIND_INT:
+	case BTF_KIND_ENUM:
+		return min(sizeof(void *), t->size);
+	case BTF_KIND_PTR:
+		return sizeof(void *);
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return btf_align_of(btf, t->type);
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *a = (void *)(t + 1);
+
+		return btf_align_of(btf, a->type);
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = (void *)(t + 1);
+		__u16 vlen = btf_vlen_of(t);
+		int i, align = 1;
+
+		for (i = 0; i < vlen; i++, m++)
+			align = max(align, btf_align_of(btf, m->type));
+
+		return align;
+	}
+	default:
+		pr_warning("unsupported BTF_KIND:%u\n", btf_kind_of(t));
+		return 1;
+	}
+}
+
+static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+				 const struct btf_type *t)
+{
+	const struct btf_member *m;
+	int align, i, bit_sz;
+	__u16 vlen;
+	bool kflag;
+
+	align = btf_align_of(btf, id);
+	/* size of a non-packed struct has to be a multiple of its alignment*/
+	if (t->size % align)
+		return true;
+
+	m = (void *)(t + 1);
+	kflag = btf_kflag_of(t);
+	vlen = btf_vlen_of(t);
+	/* all non-bitfield fields have to be naturally aligned */
+	for (i = 0; i < vlen; i++, m++) {
+		align = btf_align_of(btf, m->type);
+		bit_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+		if (bit_sz == 0 && m->offset % (8 * align) != 0)
+			return true;
+	}
+
+	/*
+	 * if original struct was marked as packed, but its layout is
+	 * naturally aligned, we'll detect that it's not packed
+	 */
+	return false;
+}
+
+static int chip_away_bits(int total, int at_most)
+{
+	return total % at_most ? : at_most;
+}
+
+static void btf_dump_emit_bit_padding(const struct btf_dump *d,
+				      int cur_off, int m_off, int m_bit_sz,
+				      int align, int lvl)
+{
+	int off_diff = m_off - cur_off;
+	int ptr_bits = sizeof(void *) * 8;
+
+	if (off_diff <= 0)
+		/* no gap */
+		return;
+	if (m_bit_sz == 0 && off_diff < align * 8)
+		/* natural padding will take care of a gap */
+		return;
+
+	while (off_diff > 0) {
+		const char *pad_type;
+		int pad_bits;
+
+		if (ptr_bits > 32 && off_diff > 32) {
+			pad_type = "long";
+			pad_bits = chip_away_bits(off_diff, ptr_bits);
+		} else if (off_diff > 16) {
+			pad_type = "int";
+			pad_bits = chip_away_bits(off_diff, 32);
+		} else if (off_diff > 8) {
+			pad_type = "short";
+			pad_bits = chip_away_bits(off_diff, 16);
+		} else {
+			pad_type = "char";
+			pad_bits = chip_away_bits(off_diff, 8);
+		}
+		btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+		off_diff -= pad_bits;
+	}
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t)
+{
+	btf_dump_printf(d, "%s %s",
+			btf_kind_of(t) == BTF_KIND_STRUCT ? "struct" : "union",
+			btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_struct_def(struct btf_dump *d,
+				     __u32 id,
+				     const struct btf_type *t,
+				     int lvl)
+{
+	const struct btf_member *m = (void *)(t + 1);
+	bool kflag = btf_kflag_of(t), is_struct;
+	int align, i, packed, off = 0;
+	__u16 vlen = btf_vlen_of(t);
+
+	is_struct = btf_kind_of(t) == BTF_KIND_STRUCT;
+	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
+	align = packed ? 1 : btf_align_of(d->btf, id);
+
+	btf_dump_printf(d, "%s%s%s {",
+			is_struct ? "struct" : "union",
+			t->name_off ? " " : "",
+			btf_dump_type_name(d, id));
+
+	for (i = 0; i < vlen; i++, m++) {
+		const char *fname;
+		int m_off, m_sz;
+
+		fname = btf_name_of(d, m->name_off);
+		m_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+		m_off = kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
+		align = packed ? 1 : btf_align_of(d->btf, m->type);
+
+		btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
+		btf_dump_printf(d, "\n%s", pfx(lvl + 1));
+		btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
+
+		if (m_sz) {
+			btf_dump_printf(d, ": %d", m_sz);
+			off = m_off + m_sz;
+		} else {
+			m_sz = max(0, btf__resolve_size(d->btf, m->type));
+			off = m_off + m_sz * 8;
+		}
+		btf_dump_printf(d, ";");
+	}
+
+	if (vlen)
+		btf_dump_printf(d, "\n");
+	btf_dump_printf(d, "%s}", pfx(lvl));
+	if (packed)
+		btf_dump_printf(d, " __attribute__((packed))");
+}
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t)
+{
+	btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t,
+				   int lvl)
+{
+	const struct btf_enum *v = (void *)(t+1);
+	__u16 vlen = btf_vlen_of(t);
+	const char *name;
+	size_t dup_cnt;
+	int i;
+
+	btf_dump_printf(d, "enum%s%s",
+			t->name_off ? " " : "",
+			btf_dump_type_name(d, id));
+
+	if (vlen) {
+		btf_dump_printf(d, " {");
+		for (i = 0; i < vlen; i++, v++) {
+			name = btf_name_of(d, v->name_off);
+			/* enumerators share namespace with typedef idents */
+			dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+			if (dup_cnt > 1) {
+				btf_dump_printf(d, "\n%s%s___%zu = %d,",
+						pfx(lvl + 1), name, dup_cnt,
+						(__s32)v->val);
+			} else {
+				btf_dump_printf(d, "\n%s%s = %d,",
+						pfx(lvl + 1), name,
+						(__s32)v->val);
+			}
+		}
+		btf_dump_printf(d, "\n%s}", pfx(lvl));
+	}
+}
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+				  const struct btf_type *t)
+{
+	const char *name = btf_dump_type_name(d, id);
+
+	if (btf_kflag_of(t))
+		btf_dump_printf(d, "union %s", name);
+	else
+		btf_dump_printf(d, "struct %s", name);
+}
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t, int lvl)
+{
+	const char *name = btf_dump_ident_name(d, id);
+
+	btf_dump_printf(d, "typedef ");
+	btf_dump_emit_type_decl(d, t->type, name, lvl);
+}
+
+static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
+{
+	__u32 *new_stack;
+	size_t new_cap;
+
+	if (d->decl_stack_cnt >= d->decl_stack_cap) {
+		new_cap = max(16, d->decl_stack_cap * 3 / 2);
+		new_stack = realloc(d->decl_stack,
+				    new_cap * sizeof(new_stack[0]));
+		if (!new_stack)
+			return -ENOMEM;
+		d->decl_stack = new_stack;
+		d->decl_stack_cap = new_cap;
+	}
+
+	d->decl_stack[d->decl_stack_cnt++] = id;
+
+	return 0;
+}
+
+/*
+ * Emit type declaration (e.g., field type declaration in a struct or argument
+ * declaration in function prototype) in correct C syntax.
+ *
+ * For most types it's trivial, but there are few quirky type declaration
+ * cases worth mentioning:
+ *   - function prototypes (especially nesting of function prototypes);
+ *   - arrays;
+ *   - const/volatile/restrict for pointers vs other types.
+ *
+ * For a good discussion of *PARSING* C syntax (as a human), see
+ * Peter van der Linden's "Expert C Programming: Deep C Secrets",
+ * Ch.3 "Unscrambling Declarations in C".
+ *
+ * It won't help with BTF to C conversion much, though, as it's an opposite
+ * problem. So we came up with this algorithm in reverse to van der Linden's
+ * parsing algorithm. It goes from structured BTF representation of type
+ * declaration to a valid compilable C syntax.
+ *
+ * For instance, consider this C typedef:
+ *	typedef const int * const * arr[10] arr_t;
+ * It will be represented in BTF with this chain of BTF types:
+ *	[typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int]
+ *
+ * Notice how [const] modifier always goes before type it modifies in BTF type
+ * graph, but in C syntax, const/volatile/restrict modifiers are written to
+ * the right of pointers, but to the left of other types. There are also other
+ * quirks, like function pointers, arrays of them, functions returning other
+ * functions, etc.
+ *
+ * We handle that by pushing all the types to a stack, until we hit "terminal"
+ * type (int/enum/struct/union/fwd). Then depending on the kind of a type on
+ * top of a stack, modifiers are handled differently. Array/function pointers
+ * have also wildly different syntax and how nesting of them are done. See
+ * code for authoritative definition.
+ *
+ * To avoid allocating new stack for each independent chain of BTF types, we
+ * share one bigger stack, with each chain working only on its own local view
+ * of a stack frame. Some care is required to "pop" stack frames after
+ * processing type declaration chain.
+ */
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+				    const char *fname, int lvl)
+{
+	struct id_stack decl_stack;
+	const struct btf_type *t;
+	int err, stack_start;
+	__u16 kind;
+
+	stack_start = d->decl_stack_cnt;
+	for (;;) {
+		err = btf_dump_push_decl_stack_id(d, id);
+		if (err < 0) {
+			/*
+			 * if we don't have enough memory for entire type decl
+			 * chain, restore stack, emit warning, and try to
+			 * proceed nevertheless
+			 */
+			pr_warning("not enough memory for decl stack:%d", err);
+			d->decl_stack_cnt = stack_start;
+			return;
+		}
+
+		/* VOID */
+		if (id == 0)
+			break;
+
+		t = btf__type_by_id(d->btf, id);
+		kind = btf_kind_of(t);
+		switch (kind) {
+		case BTF_KIND_PTR:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_FUNC_PROTO:
+			id = t->type;
+			break;
+		case BTF_KIND_ARRAY: {
+			const struct btf_array *a = (void *)(t + 1);
+
+			id = a->type;
+			break;
+		}
+		case BTF_KIND_INT:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_FWD:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+		case BTF_KIND_TYPEDEF:
+			goto done;
+		default:
+			pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
+				   kind, id);
+			goto done;
+		}
+	}
+done:
+	/*
+	 * We might be inside a chain of declarations (e.g., array of function
+	 * pointers returning anonymous (so inlined) structs, having another
+	 * array field). Each of those needs its own "stack frame" to handle
+	 * emitting of declarations. Those stack frames are non-overlapping
+	 * portions of shared btf_dump->decl_stack. To make it a bit nicer to
+	 * handle this set of nested stacks, we create a view corresponding to
+	 * our own "stack frame" and work with it as an independent stack.
+	 * We'll need to clean up after emit_type_chain() returns, though.
+	 */
+	decl_stack.ids = d->decl_stack + stack_start;
+	decl_stack.cnt = d->decl_stack_cnt - stack_start;
+	btf_dump_emit_type_chain(d, &decl_stack, fname, lvl);
+	/*
+	 * emit_type_chain() guarantees that it will pop its entire decl_stack
+	 * frame before returning. But it works with a read-only view into
+	 * decl_stack, so it doesn't actually pop anything from the
+	 * perspective of shared btf_dump->decl_stack, per se. We need to
+	 * reset decl_stack state to how it was before us to avoid it growing
+	 * all the time.
+	 */
+	d->decl_stack_cnt = stack_start;
+}
+
+static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
+{
+	const struct btf_type *t;
+	__u32 id;
+
+	while (decl_stack->cnt) {
+		id = decl_stack->ids[decl_stack->cnt - 1];
+		t = btf__type_by_id(d->btf, id);
+
+		switch (btf_kind_of(t)) {
+		case BTF_KIND_VOLATILE:
+			btf_dump_printf(d, "volatile ");
+			break;
+		case BTF_KIND_CONST:
+			btf_dump_printf(d, "const ");
+			break;
+		case BTF_KIND_RESTRICT:
+			btf_dump_printf(d, "restrict ");
+			break;
+		default:
+			return;
+		}
+		decl_stack->cnt--;
+	}
+}
+
+static bool btf_is_mod_kind(const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+
+	switch (btf_kind_of(t)) {
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void btf_dump_emit_name(const struct btf_dump *d,
+			       const char *name, bool last_was_ptr)
+{
+	bool separate = name[0] && !last_was_ptr;
+
+	btf_dump_printf(d, "%s%s", separate ? " " : "", name);
+}
+
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+				     struct id_stack *decls,
+				     const char *fname, int lvl)
+{
+	/*
+	 * last_was_ptr is used to determine if we need to separate pointer
+	 * asterisk (*) from previous part of type signature with space, so
+	 * that we get `int ***`, instead of `int * * *`. We default to true
+	 * for cases where we have single pointer in a chain. E.g., in ptr ->
+	 * func_proto case. func_proto will start a new emit_type_chain call
+	 * with just ptr, which should be emitted as (*) or (*<fname>), so we
+	 * don't want to prepend space for that last pointer.
+	 */
+	bool last_was_ptr = true;
+	const struct btf_type *t;
+	const char *name;
+	__u16 kind;
+	__u32 id;
+
+	while (decls->cnt) {
+		id = decls->ids[--decls->cnt];
+		if (id == 0) {
+			/* VOID is a special snowflake */
+			btf_dump_emit_mods(d, decls);
+			btf_dump_printf(d, "void");
+			last_was_ptr = false;
+			continue;
+		}
+
+		t = btf__type_by_id(d->btf, id);
+		kind = btf_kind_of(t);
+
+		switch (kind) {
+		case BTF_KIND_INT:
+			btf_dump_emit_mods(d, decls);
+			name = btf_name_of(d, t->name_off);
+			btf_dump_printf(d, "%s", name);
+			break;
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			btf_dump_emit_mods(d, decls);
+			/* inline anonymous struct/union */
+			if (t->name_off == 0)
+				btf_dump_emit_struct_def(d, id, t, lvl);
+			else
+				btf_dump_emit_struct_fwd(d, id, t);
+			break;
+		case BTF_KIND_ENUM:
+			btf_dump_emit_mods(d, decls);
+			/* inline anonymous enum */
+			if (t->name_off == 0)
+				btf_dump_emit_enum_def(d, id, t, lvl);
+			else
+				btf_dump_emit_enum_fwd(d, id, t);
+			break;
+		case BTF_KIND_FWD:
+			btf_dump_emit_mods(d, decls);
+			btf_dump_emit_fwd_def(d, id, t);
+			break;
+		case BTF_KIND_TYPEDEF:
+			btf_dump_emit_mods(d, decls);
+			btf_dump_printf(d, "%s", btf_dump_ident_name(d, id));
+			break;
+		case BTF_KIND_PTR:
+			btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *");
+			break;
+		case BTF_KIND_VOLATILE:
+			btf_dump_printf(d, " volatile");
+			break;
+		case BTF_KIND_CONST:
+			btf_dump_printf(d, " const");
+			break;
+		case BTF_KIND_RESTRICT:
+			btf_dump_printf(d, " restrict");
+			break;
+		case BTF_KIND_ARRAY: {
+			const struct btf_array *a = (void *)(t + 1);
+			const struct btf_type *next_t;
+			__u32 next_id;
+			bool multidim;
+			/*
+			 * GCC has a bug
+			 * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354)
+			 * which causes it to emit extra const/volatile
+			 * modifiers for an array, if array's element type has
+			 * const/volatile modifiers. Clang doesn't do that.
+			 * In general, it doesn't seem very meaningful to have
+			 * a const/volatile modifier for array, so we are
+			 * going to silently skip them here.
+			 */
+			while (decls->cnt) {
+				next_id = decls->ids[decls->cnt - 1];
+				if (btf_is_mod_kind(d->btf, next_id))
+					decls->cnt--;
+				else
+					break;
+			}
+
+			if (decls->cnt == 0) {
+				btf_dump_emit_name(d, fname, last_was_ptr);
+				btf_dump_printf(d, "[%u]", a->nelems);
+				return;
+			}
+
+			next_t = btf__type_by_id(d->btf, next_id);
+			multidim = btf_kind_of(next_t) == BTF_KIND_ARRAY;
+			/* we need space if we have named non-pointer */
+			if (fname[0] && !last_was_ptr)
+				btf_dump_printf(d, " ");
+			/* no parentheses for multi-dimensional array */
+			if (!multidim)
+				btf_dump_printf(d, "(");
+			btf_dump_emit_type_chain(d, decls, fname, lvl);
+			if (!multidim)
+				btf_dump_printf(d, ")");
+			btf_dump_printf(d, "[%u]", a->nelems);
+			return;
+		}
+		case BTF_KIND_FUNC_PROTO: {
+			const struct btf_param *p = (void *)(t + 1);
+			__u16 vlen = btf_vlen_of(t);
+			int i;
+
+			btf_dump_emit_mods(d, decls);
+			if (decls->cnt) {
+				btf_dump_printf(d, " (");
+				btf_dump_emit_type_chain(d, decls, fname, lvl);
+				btf_dump_printf(d, ")");
+			} else {
+				btf_dump_emit_name(d, fname, last_was_ptr);
+			}
+			btf_dump_printf(d, "(");
+			/*
+			 * Clang for BPF target generates func_proto with no
+			 * args as a func_proto with a single void arg (e.g.,
+			 * `int (*f)(void)` vs just `int (*f)()`). We are
+			 * going to pretend there are no args for such case.
+			 */
+			if (vlen == 1 && p->type == 0) {
+				btf_dump_printf(d, ")");
+				return;
+			}
+
+			for (i = 0; i < vlen; i++, p++) {
+				if (i > 0)
+					btf_dump_printf(d, ", ");
+
+				/* last arg of type void is vararg */
+				if (i == vlen - 1 && p->type == 0) {
+					btf_dump_printf(d, "...");
+					break;
+				}
+
+				name = btf_name_of(d, p->name_off);
+				btf_dump_emit_type_decl(d, p->type, name, lvl);
+			}
+
+			btf_dump_printf(d, ")");
+			return;
+		}
+		default:
+			pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
+				   kind, id);
+			return;
+		}
+
+		last_was_ptr = kind == BTF_KIND_PTR;
+	}
+
+	btf_dump_emit_name(d, fname, last_was_ptr);
+}
+
+/* return number of duplicates (occurrences) of a given name */
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+				 const char *orig_name)
+{
+	size_t dup_cnt = 0;
+
+	hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+	dup_cnt++;
+	hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
+
+	return dup_cnt;
+}
+
+static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
+					 struct hashmap *name_map)
+{
+	struct btf_dump_type_aux_state *s = &d->type_states[id];
+	const struct btf_type *t = btf__type_by_id(d->btf, id);
+	const char *orig_name = btf_name_of(d, t->name_off);
+	const char **cached_name = &d->cached_names[id];
+	size_t dup_cnt;
+
+	if (t->name_off == 0)
+		return "";
+
+	if (s->name_resolved)
+		return *cached_name ? *cached_name : orig_name;
+
+	dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
+	if (dup_cnt > 1) {
+		const size_t max_len = 256;
+		char new_name[max_len];
+
+		snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt);
+		*cached_name = strdup(new_name);
+	}
+
+	s->name_resolved = 1;
+	return *cached_name ? *cached_name : orig_name;
+}
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id)
+{
+	return btf_dump_resolve_name(d, id, d->type_names);
+}
+
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
+{
+	return btf_dump_resolve_name(d, id, d->ident_names);
+}
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
new file mode 100644
index 000000000000..6122272943e6
--- /dev/null
+++ b/tools/lib/bpf/hashmap.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	entry->next = *pprev;
+	*pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	*pprev = entry->next;
+	entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx)
+{
+	map->hash_fn = hash_fn;
+	map->equal_fn = equal_fn;
+	map->ctx = ctx;
+
+	map->buckets = NULL;
+	map->cap = 0;
+	map->cap_bits = 0;
+	map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx)
+{
+	struct hashmap *map = malloc(sizeof(struct hashmap));
+
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+	hashmap__init(map, hash_fn, equal_fn, ctx);
+	return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+	free(map->buckets);
+	map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+	if (!map)
+		return;
+
+	hashmap__clear(map);
+	free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+	return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+	return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+	/* grow if empty or more than 75% filled */
+	return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+	struct hashmap_entry **new_buckets;
+	struct hashmap_entry *cur, *tmp;
+	size_t new_cap_bits, new_cap;
+	size_t h;
+	int bkt;
+
+	new_cap_bits = map->cap_bits + 1;
+	if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+		new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+	new_cap = 1UL << new_cap_bits;
+	new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+	if (!new_buckets)
+		return -ENOMEM;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+		hashmap_add_entry(&new_buckets[h], cur);
+	}
+
+	map->cap = new_cap;
+	map->cap_bits = new_cap_bits;
+	free(map->buckets);
+	map->buckets = new_buckets;
+
+	return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+			       const void *key, size_t hash,
+			       struct hashmap_entry ***pprev,
+			       struct hashmap_entry **entry)
+{
+	struct hashmap_entry *cur, **prev_ptr;
+
+	if (!map->buckets)
+		return false;
+
+	for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+	     cur;
+	     prev_ptr = &cur->next, cur = cur->next) {
+		if (map->equal_fn(cur->key, key, map->ctx)) {
+			if (pprev)
+				*pprev = prev_ptr;
+			*entry = cur;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+	int err;
+
+	if (old_key)
+		*old_key = NULL;
+	if (old_value)
+		*old_value = NULL;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (strategy != HASHMAP_APPEND &&
+	    hashmap_find_entry(map, key, h, NULL, &entry)) {
+		if (old_key)
+			*old_key = entry->key;
+		if (old_value)
+			*old_value = entry->value;
+
+		if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+			entry->key = key;
+			entry->value = value;
+			return 0;
+		} else if (strategy == HASHMAP_ADD) {
+			return -EEXIST;
+		}
+	}
+
+	if (strategy == HASHMAP_UPDATE)
+		return -ENOENT;
+
+	if (hashmap_needs_to_grow(map)) {
+		err = hashmap_grow(map);
+		if (err)
+			return err;
+		h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	}
+
+	entry = malloc(sizeof(struct hashmap_entry));
+	if (!entry)
+		return -ENOMEM;
+
+	entry->key = key;
+	entry->value = value;
+	hashmap_add_entry(&map->buckets[h], entry);
+	map->sz++;
+
+	return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, NULL, &entry))
+		return false;
+
+	if (value)
+		*value = entry->value;
+	return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value)
+{
+	struct hashmap_entry **pprev, *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+		return false;
+
+	if (old_key)
+		*old_key = entry->key;
+	if (old_value)
+		*old_value = entry->value;
+
+	hashmap_del_entry(pprev, entry);
+	free(entry);
+	map->sz--;
+
+	return true;
+}
+
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
new file mode 100644
index 000000000000..03748a742146
--- /dev/null
+++ b/tools/lib/bpf/hashmap.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include "libbpf_internal.h"
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+	/* shuffle bits and return requested number of upper bits */
+	return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+	const void *key;
+	void *value;
+	struct hashmap_entry *next;
+};
+
+struct hashmap {
+	hashmap_hash_fn hash_fn;
+	hashmap_equal_fn equal_fn;
+	void *ctx;
+
+	struct hashmap_entry **buckets;
+	size_t cap;
+	size_t cap_bits;
+	size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) {	\
+	.hash_fn = (hash_fn),			\
+	.equal_fn = (equal_fn),			\
+	.ctx = (ctx),				\
+	.buckets = NULL,			\
+	.cap = 0,				\
+	.cap_bits = 0,				\
+	.sz = 0,				\
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ *   update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ *   nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ *   This turns hashmap into a multimap by allowing multiple values to be
+ *   associated with the same key. Most useful read API for such hashmap is
+ *   hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ *   used, it will return last inserted key/value entry (first in a bucket
+ *   chain).
+ */
+enum hashmap_insert_strategy {
+	HASHMAP_ADD,
+	HASHMAP_SET,
+	HASHMAP_UPDATE,
+	HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+			       const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+			       const void *key, void *value,
+			       const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_SET,
+			       old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+				  const void *key, void *value,
+				  const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+			       old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+				  const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt)				    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt)		    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt];				    \
+		     cur && ({tmp = cur->next; true; });		    \
+		     cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key)			    \
+	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+					     map->cap_bits);		    \
+		     map->buckets ? map->buckets[bkt] : NULL; });	    \
+	     cur;							    \
+	     cur = cur->next)						    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)		    \
+	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+					     map->cap_bits);		    \
+		     cur = map->buckets ? map->buckets[bkt] : NULL; });	    \
+	     cur && ({ tmp = cur->next; true; });			    \
+	     cur = tmp)							    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 151f7ac1882e..ed07789b3e62 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -32,6 +32,9 @@
 #include <linux/limits.h>
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -188,6 +191,7 @@ struct bpf_program {
 	void *line_info;
 	__u32 line_info_rec_size;
 	__u32 line_info_cnt;
+	__u32 prog_flags;
 };
 
 enum libbpf_map_type {
@@ -206,7 +210,8 @@ static const char * const libbpf_type_to_btf_name[] = {
 struct bpf_map {
 	int fd;
 	char *name;
-	size_t offset;
+	int sec_idx;
+	size_t sec_offset;
 	int map_ifindex;
 	int inner_map_fd;
 	struct bpf_map_def def;
@@ -233,6 +238,7 @@ struct bpf_object {
 	size_t nr_programs;
 	struct bpf_map *maps;
 	size_t nr_maps;
+	size_t maps_cap;
 	struct bpf_secdata sections;
 
 	bool loaded;
@@ -259,6 +265,7 @@ struct bpf_object {
 		} *reloc;
 		int nr_reloc;
 		int maps_shndx;
+		int btf_maps_shndx;
 		int text_shndx;
 		int data_shndx;
 		int rodata_shndx;
@@ -348,8 +355,11 @@ static int
 bpf_program__init(void *data, size_t size, char *section_name, int idx,
 		  struct bpf_program *prog)
 {
-	if (size < sizeof(struct bpf_insn)) {
-		pr_warning("corrupted section '%s'\n", section_name);
+	const size_t bpf_insn_sz = sizeof(struct bpf_insn);
+
+	if (size == 0 || size % bpf_insn_sz) {
+		pr_warning("corrupted section '%s', size: %zu\n",
+			   section_name, size);
 		return -EINVAL;
 	}
 
@@ -375,9 +385,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 			   section_name);
 		goto errout;
 	}
-	prog->insns_cnt = size / sizeof(struct bpf_insn);
-	memcpy(prog->insns, data,
-	       prog->insns_cnt * sizeof(struct bpf_insn));
+	prog->insns_cnt = size / bpf_insn_sz;
+	memcpy(prog->insns, data, size);
 	prog->idx = idx;
 	prog->instances.fds = NULL;
 	prog->instances.nr = -1;
@@ -494,15 +503,14 @@ static struct bpf_object *bpf_object__new(const char *path,
 
 	strcpy(obj->path, path);
 	/* Using basename() GNU version which doesn't modify arg. */
-	strncpy(obj->name, basename((void *)path),
-		sizeof(obj->name) - 1);
+	strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1);
 	end = strchr(obj->name, '.');
 	if (end)
 		*end = 0;
 
 	obj->efile.fd = -1;
 	/*
-	 * Caller of this function should also calls
+	 * Caller of this function should also call
 	 * bpf_object__elf_finish() after data collection to return
 	 * obj_buf to user. If not, we should duplicate the buffer to
 	 * avoid user freeing them before elf finish.
@@ -510,6 +518,7 @@ static struct bpf_object *bpf_object__new(const char *path,
 	obj->efile.obj_buf = obj_buf;
 	obj->efile.obj_buf_sz = obj_buf_sz;
 	obj->efile.maps_shndx = -1;
+	obj->efile.btf_maps_shndx = -1;
 	obj->efile.data_shndx = -1;
 	obj->efile.rodata_shndx = -1;
 	obj->efile.bss_shndx = -1;
@@ -562,38 +571,35 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	} else {
 		obj->efile.fd = open(obj->path, O_RDONLY);
 		if (obj->efile.fd < 0) {
-			char errmsg[STRERR_BUFSIZE];
-			char *cp = libbpf_strerror_r(errno, errmsg,
-						     sizeof(errmsg));
+			char errmsg[STRERR_BUFSIZE], *cp;
 
+			err = -errno;
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 			pr_warning("failed to open %s: %s\n", obj->path, cp);
-			return -errno;
+			return err;
 		}
 
 		obj->efile.elf = elf_begin(obj->efile.fd,
-				LIBBPF_ELF_C_READ_MMAP,
-				NULL);
+					   LIBBPF_ELF_C_READ_MMAP, NULL);
 	}
 
 	if (!obj->efile.elf) {
-		pr_warning("failed to open %s as ELF file\n",
-				obj->path);
+		pr_warning("failed to open %s as ELF file\n", obj->path);
 		err = -LIBBPF_ERRNO__LIBELF;
 		goto errout;
 	}
 
 	if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
-		pr_warning("failed to get EHDR from %s\n",
-				obj->path);
+		pr_warning("failed to get EHDR from %s\n", obj->path);
 		err = -LIBBPF_ERRNO__FORMAT;
 		goto errout;
 	}
 	ep = &obj->efile.ehdr;
 
 	/* Old LLVM set e_machine to EM_NONE */
-	if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) {
-		pr_warning("%s is not an eBPF object file\n",
-			obj->path);
+	if (ep->e_type != ET_REL ||
+	    (ep->e_machine && ep->e_machine != EM_BPF)) {
+		pr_warning("%s is not an eBPF object file\n", obj->path);
 		err = -LIBBPF_ERRNO__FORMAT;
 		goto errout;
 	}
@@ -604,47 +610,31 @@ errout:
 	return err;
 }
 
-static int
-bpf_object__check_endianness(struct bpf_object *obj)
+static int bpf_object__check_endianness(struct bpf_object *obj)
 {
-	static unsigned int const endian = 1;
-
-	switch (obj->efile.ehdr.e_ident[EI_DATA]) {
-	case ELFDATA2LSB:
-		/* We are big endian, BPF obj is little endian. */
-		if (*(unsigned char const *)&endian != 1)
-			goto mismatch;
-		break;
-
-	case ELFDATA2MSB:
-		/* We are little endian, BPF obj is big endian. */
-		if (*(unsigned char const *)&endian != 0)
-			goto mismatch;
-		break;
-	default:
-		return -LIBBPF_ERRNO__ENDIAN;
-	}
-
-	return 0;
-
-mismatch:
-	pr_warning("Error: endianness mismatch.\n");
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return 0;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
+		return 0;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+	pr_warning("endianness mismatch.\n");
 	return -LIBBPF_ERRNO__ENDIAN;
 }
 
 static int
-bpf_object__init_license(struct bpf_object *obj,
-			 void *data, size_t size)
+bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
 {
-	memcpy(obj->license, data,
-	       min(size, sizeof(obj->license) - 1));
+	memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
 	pr_debug("license of %s is %s\n", obj->path, obj->license);
 	return 0;
 }
 
 static int
-bpf_object__init_kversion(struct bpf_object *obj,
-			  void *data, size_t size)
+bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
 {
 	__u32 kver;
 
@@ -654,8 +644,7 @@ bpf_object__init_kversion(struct bpf_object *obj,
 	}
 	memcpy(&kver, data, sizeof(kver));
 	obj->kern_version = kver;
-	pr_debug("kernel version of %s is %x\n", obj->path,
-		 obj->kern_version);
+	pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
 	return 0;
 }
 
@@ -664,7 +653,9 @@ static int compare_bpf_map(const void *_a, const void *_b)
 	const struct bpf_map *a = _a;
 	const struct bpf_map *b = _b;
 
-	return a->offset - b->offset;
+	if (a->sec_idx != b->sec_idx)
+		return a->sec_idx - b->sec_idx;
+	return a->sec_offset - b->sec_offset;
 }
 
 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
@@ -781,24 +772,55 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 	return -ENOENT;
 }
 
-static bool bpf_object__has_maps(const struct bpf_object *obj)
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 {
-	return obj->efile.maps_shndx >= 0 ||
-	       obj->efile.data_shndx >= 0 ||
-	       obj->efile.rodata_shndx >= 0 ||
-	       obj->efile.bss_shndx >= 0;
+	struct bpf_map *new_maps;
+	size_t new_cap;
+	int i;
+
+	if (obj->nr_maps < obj->maps_cap)
+		return &obj->maps[obj->nr_maps++];
+
+	new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
+	new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+	if (!new_maps) {
+		pr_warning("alloc maps for object failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	obj->maps_cap = new_cap;
+	obj->maps = new_maps;
+
+	/* zero out new maps */
+	memset(obj->maps + obj->nr_maps, 0,
+	       (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
+	/*
+	 * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
+	 * when failure (zclose won't close negative fd)).
+	 */
+	for (i = obj->nr_maps; i < obj->maps_cap; i++) {
+		obj->maps[i].fd = -1;
+		obj->maps[i].inner_map_fd = -1;
+	}
+
+	return &obj->maps[obj->nr_maps++];
 }
 
 static int
-bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
-			      enum libbpf_map_type type, Elf_Data *data,
-			      void **data_buff)
+bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
+			      int sec_idx, Elf_Data *data, void **data_buff)
 {
-	struct bpf_map_def *def = &map->def;
 	char map_name[BPF_OBJ_NAME_LEN];
+	struct bpf_map_def *def;
+	struct bpf_map *map;
+
+	map = bpf_object__add_map(obj);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	map->libbpf_type = type;
-	map->offset = ~(typeof(map->offset))0;
+	map->sec_idx = sec_idx;
+	map->sec_offset = 0;
 	snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
 		 libbpf_type_to_btf_name[type]);
 	map->name = strdup(map_name);
@@ -806,13 +828,15 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 		pr_warning("failed to alloc map name\n");
 		return -ENOMEM;
 	}
+	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n",
+		 map_name, map->sec_idx, map->sec_offset);
 
+	def = &map->def;
 	def->type = BPF_MAP_TYPE_ARRAY;
 	def->key_size = sizeof(int);
 	def->value_size = data->d_size;
 	def->max_entries = 1;
-	def->map_flags = type == LIBBPF_MAP_RODATA ?
-			 BPF_F_RDONLY_PROG : 0;
+	def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
 	if (data_buff) {
 		*data_buff = malloc(data->d_size);
 		if (!*data_buff) {
@@ -827,30 +851,61 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 	return 0;
 }
 
-static int
-bpf_object__init_maps(struct bpf_object *obj, int flags)
+static int bpf_object__init_global_data_maps(struct bpf_object *obj)
+{
+	int err;
+
+	if (!obj->caps.global_data)
+		return 0;
+	/*
+	 * Populate obj->maps with libbpf internal maps.
+	 */
+	if (obj->efile.data_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
+						    obj->efile.data_shndx,
+						    obj->efile.data,
+						    &obj->sections.data);
+		if (err)
+			return err;
+	}
+	if (obj->efile.rodata_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
+						    obj->efile.rodata_shndx,
+						    obj->efile.rodata,
+						    &obj->sections.rodata);
+		if (err)
+			return err;
+	}
+	if (obj->efile.bss_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
+						    obj->efile.bss_shndx,
+						    obj->efile.bss, NULL);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 {
-	int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
-	bool strict = !(flags & MAPS_RELAX_COMPAT);
 	Elf_Data *symbols = obj->efile.symbols;
+	int i, map_def_sz = 0, nr_maps = 0, nr_syms;
 	Elf_Data *data = NULL;
-	int ret = 0;
+	Elf_Scn *scn;
+
+	if (obj->efile.maps_shndx < 0)
+		return 0;
 
 	if (!symbols)
 		return -EINVAL;
-	nr_syms = symbols->d_size / sizeof(GElf_Sym);
 
-	if (obj->efile.maps_shndx >= 0) {
-		Elf_Scn *scn = elf_getscn(obj->efile.elf,
-					  obj->efile.maps_shndx);
-
-		if (scn)
-			data = elf_getdata(scn, NULL);
-		if (!scn || !data) {
-			pr_warning("failed to get Elf_Data from map section %d\n",
-				   obj->efile.maps_shndx);
-			return -EINVAL;
-		}
+	scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
+	if (scn)
+		data = elf_getdata(scn, NULL);
+	if (!scn || !data) {
+		pr_warning("failed to get Elf_Data from map section %d\n",
+			   obj->efile.maps_shndx);
+		return -EINVAL;
 	}
 
 	/*
@@ -860,16 +915,8 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 	 *
 	 * TODO: Detect array of map and report error.
 	 */
-	if (obj->caps.global_data) {
-		if (obj->efile.data_shndx >= 0)
-			nr_maps_glob++;
-		if (obj->efile.rodata_shndx >= 0)
-			nr_maps_glob++;
-		if (obj->efile.bss_shndx >= 0)
-			nr_maps_glob++;
-	}
-
-	for (i = 0; data && i < nr_syms; i++) {
+	nr_syms = symbols->d_size / sizeof(GElf_Sym);
+	for (i = 0; i < nr_syms; i++) {
 		GElf_Sym sym;
 
 		if (!gelf_getsym(symbols, i, &sym))
@@ -878,74 +925,59 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 			continue;
 		nr_maps++;
 	}
-
-	if (!nr_maps && !nr_maps_glob)
-		return 0;
-
 	/* Assume equally sized map definitions */
-	if (data) {
-		pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
-			 nr_maps, data->d_size);
-
-		map_def_sz = data->d_size / nr_maps;
-		if (!data->d_size || (data->d_size % nr_maps) != 0) {
-			pr_warning("unable to determine map definition size "
-				   "section %s, %d maps in %zd bytes\n",
-				   obj->path, nr_maps, data->d_size);
-			return -EINVAL;
-		}
-	}
-
-	nr_maps += nr_maps_glob;
-	obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
-	if (!obj->maps) {
-		pr_warning("alloc maps for object failed\n");
-		return -ENOMEM;
-	}
-	obj->nr_maps = nr_maps;
-
-	for (i = 0; i < nr_maps; i++) {
-		/*
-		 * fill all fd with -1 so won't close incorrect
-		 * fd (fd=0 is stdin) when failure (zclose won't close
-		 * negative fd)).
-		 */
-		obj->maps[i].fd = -1;
-		obj->maps[i].inner_map_fd = -1;
+	pr_debug("maps in %s: %d maps in %zd bytes\n",
+		 obj->path, nr_maps, data->d_size);
+
+	map_def_sz = data->d_size / nr_maps;
+	if (!data->d_size || (data->d_size % nr_maps) != 0) {
+		pr_warning("unable to determine map definition size "
+			   "section %s, %d maps in %zd bytes\n",
+			   obj->path, nr_maps, data->d_size);
+		return -EINVAL;
 	}
 
-	/*
-	 * Fill obj->maps using data in "maps" section.
-	 */
-	for (i = 0, map_idx = 0; data && i < nr_syms; i++) {
+	/* Fill obj->maps using data in "maps" section.  */
+	for (i = 0; i < nr_syms; i++) {
 		GElf_Sym sym;
 		const char *map_name;
 		struct bpf_map_def *def;
+		struct bpf_map *map;
 
 		if (!gelf_getsym(symbols, i, &sym))
 			continue;
 		if (sym.st_shndx != obj->efile.maps_shndx)
 			continue;
 
-		map_name = elf_strptr(obj->efile.elf,
-				      obj->efile.strtabidx,
+		map = bpf_object__add_map(obj);
+		if (IS_ERR(map))
+			return PTR_ERR(map);
+
+		map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
 				      sym.st_name);
+		if (!map_name) {
+			pr_warning("failed to get map #%d name sym string for obj %s\n",
+				   i, obj->path);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
 
-		obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
-		obj->maps[map_idx].offset = sym.st_value;
+		map->libbpf_type = LIBBPF_MAP_UNSPEC;
+		map->sec_idx = sym.st_shndx;
+		map->sec_offset = sym.st_value;
+		pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
+			 map_name, map->sec_idx, map->sec_offset);
 		if (sym.st_value + map_def_sz > data->d_size) {
 			pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
 				   obj->path, map_name);
 			return -EINVAL;
 		}
 
-		obj->maps[map_idx].name = strdup(map_name);
-		if (!obj->maps[map_idx].name) {
+		map->name = strdup(map_name);
+		if (!map->name) {
 			pr_warning("failed to alloc map name\n");
 			return -ENOMEM;
 		}
-		pr_debug("map %d is \"%s\"\n", map_idx,
-			 obj->maps[map_idx].name);
+		pr_debug("map %d is \"%s\"\n", i, map->name);
 		def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
 		/*
 		 * If the definition of the map in the object file fits in
@@ -954,7 +986,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 		 * calloc above.
 		 */
 		if (map_def_sz <= sizeof(struct bpf_map_def)) {
-			memcpy(&obj->maps[map_idx].def, def, map_def_sz);
+			memcpy(&map->def, def, map_def_sz);
 		} else {
 			/*
 			 * Here the map structure being read is bigger than what
@@ -974,37 +1006,338 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 						return -EINVAL;
 				}
 			}
-			memcpy(&obj->maps[map_idx].def, def,
-			       sizeof(struct bpf_map_def));
+			memcpy(&map->def, def, sizeof(struct bpf_map_def));
 		}
-		map_idx++;
 	}
+	return 0;
+}
 
-	if (!obj->caps.global_data)
-		goto finalize;
+static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
+						     __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
 
-	/*
-	 * Populate rest of obj->maps with libbpf internal maps.
-	 */
-	if (obj->efile.data_shndx >= 0)
-		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
-						    LIBBPF_MAP_DATA,
-						    obj->efile.data,
-						    &obj->sections.data);
-	if (!ret && obj->efile.rodata_shndx >= 0)
-		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
-						    LIBBPF_MAP_RODATA,
-						    obj->efile.rodata,
-						    &obj->sections.rodata);
-	if (!ret && obj->efile.bss_shndx >= 0)
-		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
-						    LIBBPF_MAP_BSS,
-						    obj->efile.bss, NULL);
-finalize:
-	if (!ret)
+	while (true) {
+		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_TYPEDEF:
+			t = btf__type_by_id(btf, t->type);
+			break;
+		default:
+			return t;
+		}
+	}
+}
+
+/*
+ * Fetch integer attribute of BTF map definition. Such attributes are
+ * represented using a pointer to an array, in which dimensionality of array
+ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
+ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
+ * type definition, while using only sizeof(void *) space in ELF data section.
+ */
+static bool get_map_field_int(const char *map_name, const struct btf *btf,
+			      const struct btf_type *def,
+			      const struct btf_member *m, __u32 *res) {
+	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
+	const char *name = btf__name_by_offset(btf, m->name_off);
+	const struct btf_array *arr_info;
+	const struct btf_type *arr_t;
+
+	if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+		pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
+			   map_name, name, BTF_INFO_KIND(t->info));
+		return false;
+	}
+
+	arr_t = btf__type_by_id(btf, t->type);
+	if (!arr_t) {
+		pr_warning("map '%s': attr '%s': type [%u] not found.\n",
+			   map_name, name, t->type);
+		return false;
+	}
+	if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) {
+		pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
+			   map_name, name, BTF_INFO_KIND(arr_t->info));
+		return false;
+	}
+	arr_info = (const void *)(arr_t + 1);
+	*res = arr_info->nelems;
+	return true;
+}
+
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+					 const struct btf_type *sec,
+					 int var_idx, int sec_idx,
+					 const Elf_Data *data, bool strict)
+{
+	const struct btf_type *var, *def, *t;
+	const struct btf_var_secinfo *vi;
+	const struct btf_var *var_extra;
+	const struct btf_member *m;
+	const char *map_name;
+	struct bpf_map *map;
+	int vlen, i;
+
+	vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
+	var = btf__type_by_id(obj->btf, vi->type);
+	var_extra = (const void *)(var + 1);
+	map_name = btf__name_by_offset(obj->btf, var->name_off);
+	vlen = BTF_INFO_VLEN(var->info);
+
+	if (map_name == NULL || map_name[0] == '\0') {
+		pr_warning("map #%d: empty name.\n", var_idx);
+		return -EINVAL;
+	}
+	if ((__u64)vi->offset + vi->size > data->d_size) {
+		pr_warning("map '%s' BTF data is corrupted.\n", map_name);
+		return -EINVAL;
+	}
+	if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
+		pr_warning("map '%s': unexpected var kind %u.\n",
+			   map_name, BTF_INFO_KIND(var->info));
+		return -EINVAL;
+	}
+	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+	    var_extra->linkage != BTF_VAR_STATIC) {
+		pr_warning("map '%s': unsupported var linkage %u.\n",
+			   map_name, var_extra->linkage);
+		return -EOPNOTSUPP;
+	}
+
+	def = skip_mods_and_typedefs(obj->btf, var->type);
+	if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
+		pr_warning("map '%s': unexpected def kind %u.\n",
+			   map_name, BTF_INFO_KIND(var->info));
+		return -EINVAL;
+	}
+	if (def->size > vi->size) {
+		pr_warning("map '%s': invalid def size.\n", map_name);
+		return -EINVAL;
+	}
+
+	map = bpf_object__add_map(obj);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	map->name = strdup(map_name);
+	if (!map->name) {
+		pr_warning("map '%s': failed to alloc map name.\n", map_name);
+		return -ENOMEM;
+	}
+	map->libbpf_type = LIBBPF_MAP_UNSPEC;
+	map->def.type = BPF_MAP_TYPE_UNSPEC;
+	map->sec_idx = sec_idx;
+	map->sec_offset = vi->offset;
+	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+		 map_name, map->sec_idx, map->sec_offset);
+
+	vlen = BTF_INFO_VLEN(def->info);
+	m = (const void *)(def + 1);
+	for (i = 0; i < vlen; i++, m++) {
+		const char *name = btf__name_by_offset(obj->btf, m->name_off);
+
+		if (!name) {
+			pr_warning("map '%s': invalid field #%d.\n",
+				   map_name, i);
+			return -EINVAL;
+		}
+		if (strcmp(name, "type") == 0) {
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       &map->def.type))
+				return -EINVAL;
+			pr_debug("map '%s': found type = %u.\n",
+				 map_name, map->def.type);
+		} else if (strcmp(name, "max_entries") == 0) {
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       &map->def.max_entries))
+				return -EINVAL;
+			pr_debug("map '%s': found max_entries = %u.\n",
+				 map_name, map->def.max_entries);
+		} else if (strcmp(name, "map_flags") == 0) {
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       &map->def.map_flags))
+				return -EINVAL;
+			pr_debug("map '%s': found map_flags = %u.\n",
+				 map_name, map->def.map_flags);
+		} else if (strcmp(name, "key_size") == 0) {
+			__u32 sz;
+
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       &sz))
+				return -EINVAL;
+			pr_debug("map '%s': found key_size = %u.\n",
+				 map_name, sz);
+			if (map->def.key_size && map->def.key_size != sz) {
+				pr_warning("map '%s': conflicting key size %u != %u.\n",
+					   map_name, map->def.key_size, sz);
+				return -EINVAL;
+			}
+			map->def.key_size = sz;
+		} else if (strcmp(name, "key") == 0) {
+			__s64 sz;
+
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warning("map '%s': key type [%d] not found.\n",
+					   map_name, m->type);
+				return -EINVAL;
+			}
+			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+				pr_warning("map '%s': key spec is not PTR: %u.\n",
+					   map_name, BTF_INFO_KIND(t->info));
+				return -EINVAL;
+			}
+			sz = btf__resolve_size(obj->btf, t->type);
+			if (sz < 0) {
+				pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n",
+					   map_name, t->type, sz);
+				return sz;
+			}
+			pr_debug("map '%s': found key [%u], sz = %lld.\n",
+				 map_name, t->type, sz);
+			if (map->def.key_size && map->def.key_size != sz) {
+				pr_warning("map '%s': conflicting key size %u != %lld.\n",
+					   map_name, map->def.key_size, sz);
+				return -EINVAL;
+			}
+			map->def.key_size = sz;
+			map->btf_key_type_id = t->type;
+		} else if (strcmp(name, "value_size") == 0) {
+			__u32 sz;
+
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       &sz))
+				return -EINVAL;
+			pr_debug("map '%s': found value_size = %u.\n",
+				 map_name, sz);
+			if (map->def.value_size && map->def.value_size != sz) {
+				pr_warning("map '%s': conflicting value size %u != %u.\n",
+					   map_name, map->def.value_size, sz);
+				return -EINVAL;
+			}
+			map->def.value_size = sz;
+		} else if (strcmp(name, "value") == 0) {
+			__s64 sz;
+
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warning("map '%s': value type [%d] not found.\n",
+					   map_name, m->type);
+				return -EINVAL;
+			}
+			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+				pr_warning("map '%s': value spec is not PTR: %u.\n",
+					   map_name, BTF_INFO_KIND(t->info));
+				return -EINVAL;
+			}
+			sz = btf__resolve_size(obj->btf, t->type);
+			if (sz < 0) {
+				pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n",
+					   map_name, t->type, sz);
+				return sz;
+			}
+			pr_debug("map '%s': found value [%u], sz = %lld.\n",
+				 map_name, t->type, sz);
+			if (map->def.value_size && map->def.value_size != sz) {
+				pr_warning("map '%s': conflicting value size %u != %lld.\n",
+					   map_name, map->def.value_size, sz);
+				return -EINVAL;
+			}
+			map->def.value_size = sz;
+			map->btf_value_type_id = t->type;
+		} else {
+			if (strict) {
+				pr_warning("map '%s': unknown field '%s'.\n",
+					   map_name, name);
+				return -ENOTSUP;
+			}
+			pr_debug("map '%s': ignoring unknown field '%s'.\n",
+				 map_name, name);
+		}
+	}
+
+	if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
+		pr_warning("map '%s': map type isn't specified.\n", map_name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
+{
+	const struct btf_type *sec = NULL;
+	int nr_types, i, vlen, err;
+	const struct btf_type *t;
+	const char *name;
+	Elf_Data *data;
+	Elf_Scn *scn;
+
+	if (obj->efile.btf_maps_shndx < 0)
+		return 0;
+
+	scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
+	if (scn)
+		data = elf_getdata(scn, NULL);
+	if (!scn || !data) {
+		pr_warning("failed to get Elf_Data from map section %d (%s)\n",
+			   obj->efile.maps_shndx, MAPS_ELF_SEC);
+		return -EINVAL;
+	}
+
+	nr_types = btf__get_nr_types(obj->btf);
+	for (i = 1; i <= nr_types; i++) {
+		t = btf__type_by_id(obj->btf, i);
+		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
+			continue;
+		name = btf__name_by_offset(obj->btf, t->name_off);
+		if (strcmp(name, MAPS_ELF_SEC) == 0) {
+			sec = t;
+			break;
+		}
+	}
+
+	if (!sec) {
+		pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
+		return -ENOENT;
+	}
+
+	vlen = BTF_INFO_VLEN(sec->info);
+	for (i = 0; i < vlen; i++) {
+		err = bpf_object__init_user_btf_map(obj, sec, i,
+						    obj->efile.btf_maps_shndx,
+						    data, strict);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int bpf_object__init_maps(struct bpf_object *obj, int flags)
+{
+	bool strict = !(flags & MAPS_RELAX_COMPAT);
+	int err;
+
+	err = bpf_object__init_user_maps(obj, strict);
+	if (err)
+		return err;
+
+	err = bpf_object__init_user_btf_maps(obj, strict);
+	if (err)
+		return err;
+
+	err = bpf_object__init_global_data_maps(obj);
+	if (err)
+		return err;
+
+	if (obj->nr_maps) {
 		qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
 		      compare_bpf_map);
-	return ret;
+	}
+	return 0;
 }
 
 static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -1093,6 +1426,86 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
 	}
 }
 
+static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
+{
+	return obj->efile.btf_maps_shndx >= 0;
+}
+
+static int bpf_object__init_btf(struct bpf_object *obj,
+				Elf_Data *btf_data,
+				Elf_Data *btf_ext_data)
+{
+	bool btf_required = bpf_object__is_btf_mandatory(obj);
+	int err = 0;
+
+	if (btf_data) {
+		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+		if (IS_ERR(obj->btf)) {
+			pr_warning("Error loading ELF section %s: %d.\n",
+				   BTF_ELF_SEC, err);
+			goto out;
+		}
+		err = btf__finalize_data(obj, obj->btf);
+		if (err) {
+			pr_warning("Error finalizing %s: %d.\n",
+				   BTF_ELF_SEC, err);
+			goto out;
+		}
+	}
+	if (btf_ext_data) {
+		if (!obj->btf) {
+			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
+				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
+			goto out;
+		}
+		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
+					    btf_ext_data->d_size);
+		if (IS_ERR(obj->btf_ext)) {
+			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+				   BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+			obj->btf_ext = NULL;
+			goto out;
+		}
+	}
+out:
+	if (err || IS_ERR(obj->btf)) {
+		if (btf_required)
+			err = err ? : PTR_ERR(obj->btf);
+		else
+			err = 0;
+		if (!IS_ERR_OR_NULL(obj->btf))
+			btf__free(obj->btf);
+		obj->btf = NULL;
+	}
+	if (btf_required && !obj->btf) {
+		pr_warning("BTF is required, but is missing or corrupted.\n");
+		return err == 0 ? -ENOENT : err;
+	}
+	return 0;
+}
+
+static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
+{
+	int err = 0;
+
+	if (!obj->btf)
+		return 0;
+
+	bpf_object__sanitize_btf(obj);
+	bpf_object__sanitize_btf_ext(obj);
+
+	err = btf__load(obj->btf);
+	if (err) {
+		pr_warning("Error loading %s into kernel: %d.\n",
+			   BTF_ELF_SEC, err);
+		btf__free(obj->btf);
+		obj->btf = NULL;
+		if (bpf_object__is_btf_mandatory(obj))
+			return err;
+	}
+	return 0;
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 {
 	Elf *elf = obj->efile.elf;
@@ -1104,8 +1517,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 
 	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
 	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
-		pr_warning("failed to get e_shstrndx from %s\n",
-			   obj->path);
+		pr_warning("failed to get e_shstrndx from %s\n", obj->path);
 		return -LIBBPF_ERRNO__FORMAT;
 	}
 
@@ -1118,24 +1530,21 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 		if (gelf_getshdr(scn, &sh) != &sh) {
 			pr_warning("failed to get section(%d) header from %s\n",
 				   idx, obj->path);
-			err = -LIBBPF_ERRNO__FORMAT;
-			goto out;
+			return -LIBBPF_ERRNO__FORMAT;
 		}
 
 		name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
 		if (!name) {
 			pr_warning("failed to get section(%d) name from %s\n",
 				   idx, obj->path);
-			err = -LIBBPF_ERRNO__FORMAT;
-			goto out;
+			return -LIBBPF_ERRNO__FORMAT;
 		}
 
 		data = elf_getdata(scn, 0);
 		if (!data) {
 			pr_warning("failed to get section(%d) data from %s(%s)\n",
 				   idx, name, obj->path);
-			err = -LIBBPF_ERRNO__FORMAT;
-			goto out;
+			return -LIBBPF_ERRNO__FORMAT;
 		}
 		pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
 			 idx, name, (unsigned long)data->d_size,
@@ -1146,12 +1555,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			err = bpf_object__init_license(obj,
 						       data->d_buf,
 						       data->d_size);
+			if (err)
+				return err;
 		} else if (strcmp(name, "version") == 0) {
 			err = bpf_object__init_kversion(obj,
 							data->d_buf,
 							data->d_size);
+			if (err)
+				return err;
 		} else if (strcmp(name, "maps") == 0) {
 			obj->efile.maps_shndx = idx;
+		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
+			obj->efile.btf_maps_shndx = idx;
 		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
 			btf_data = data;
 		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
@@ -1160,11 +1575,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			if (obj->efile.symbols) {
 				pr_warning("bpf: multiple SYMTAB in %s\n",
 					   obj->path);
-				err = -LIBBPF_ERRNO__FORMAT;
-			} else {
-				obj->efile.symbols = data;
-				obj->efile.strtabidx = sh.sh_link;
+				return -LIBBPF_ERRNO__FORMAT;
 			}
+			obj->efile.symbols = data;
+			obj->efile.strtabidx = sh.sh_link;
 		} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
 			if (sh.sh_flags & SHF_EXECINSTR) {
 				if (strcmp(name, ".text") == 0)
@@ -1178,6 +1592,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 
 					pr_warning("failed to alloc program %s (%s): %s",
 						   name, obj->path, cp);
+					return err;
 				}
 			} else if (strcmp(name, ".data") == 0) {
 				obj->efile.data = data;
@@ -1189,8 +1604,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 				pr_debug("skip section(%d) %s\n", idx, name);
 			}
 		} else if (sh.sh_type == SHT_REL) {
+			int nr_reloc = obj->efile.nr_reloc;
 			void *reloc = obj->efile.reloc;
-			int nr_reloc = obj->efile.nr_reloc + 1;
 			int sec = sh.sh_info; /* points to other section */
 
 			/* Only do relo for section with exec instructions */
@@ -1200,79 +1615,37 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 				continue;
 			}
 
-			reloc = reallocarray(reloc, nr_reloc,
+			reloc = reallocarray(reloc, nr_reloc + 1,
 					     sizeof(*obj->efile.reloc));
 			if (!reloc) {
 				pr_warning("realloc failed\n");
-				err = -ENOMEM;
-			} else {
-				int n = nr_reloc - 1;
+				return -ENOMEM;
+			}
 
-				obj->efile.reloc = reloc;
-				obj->efile.nr_reloc = nr_reloc;
+			obj->efile.reloc = reloc;
+			obj->efile.nr_reloc++;
 
-				obj->efile.reloc[n].shdr = sh;
-				obj->efile.reloc[n].data = data;
-			}
+			obj->efile.reloc[nr_reloc].shdr = sh;
+			obj->efile.reloc[nr_reloc].data = data;
 		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
 			obj->efile.bss = data;
 			obj->efile.bss_shndx = idx;
 		} else {
 			pr_debug("skip section(%d) %s\n", idx, name);
 		}
-		if (err)
-			goto out;
 	}
 
 	if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
 		pr_warning("Corrupted ELF file: index of strtab invalid\n");
-		return LIBBPF_ERRNO__FORMAT;
-	}
-	if (btf_data) {
-		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
-		if (IS_ERR(obj->btf)) {
-			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
-				   BTF_ELF_SEC, PTR_ERR(obj->btf));
-			obj->btf = NULL;
-		} else {
-			err = btf__finalize_data(obj, obj->btf);
-			if (!err) {
-				bpf_object__sanitize_btf(obj);
-				err = btf__load(obj->btf);
-			}
-			if (err) {
-				pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n",
-					   BTF_ELF_SEC, err);
-				btf__free(obj->btf);
-				obj->btf = NULL;
-				err = 0;
-			}
-		}
-	}
-	if (btf_ext_data) {
-		if (!obj->btf) {
-			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
-				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
-		} else {
-			obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
-						    btf_ext_data->d_size);
-			if (IS_ERR(obj->btf_ext)) {
-				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
-					   BTF_EXT_ELF_SEC,
-					   PTR_ERR(obj->btf_ext));
-				obj->btf_ext = NULL;
-			} else {
-				bpf_object__sanitize_btf_ext(obj);
-			}
-		}
+		return -LIBBPF_ERRNO__FORMAT;
 	}
-	if (bpf_object__has_maps(obj)) {
+	err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
+	if (!err)
 		err = bpf_object__init_maps(obj, flags);
-		if (err)
-			goto out;
-	}
-	err = bpf_object__init_prog_names(obj);
-out:
+	if (!err)
+		err = bpf_object__sanitize_and_load_btf(obj);
+	if (!err)
+		err = bpf_object__init_prog_names(obj);
 	return err;
 }
 
@@ -1291,7 +1664,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
 }
 
 struct bpf_program *
-bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+				  const char *title)
 {
 	struct bpf_program *pos;
 
@@ -1313,7 +1687,8 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
 				      int shndx)
 {
-	return shndx == obj->efile.maps_shndx;
+	return shndx == obj->efile.maps_shndx ||
+	       shndx == obj->efile.btf_maps_shndx;
 }
 
 static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
@@ -1346,8 +1721,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	size_t nr_maps = obj->nr_maps;
 	int i, nrels;
 
-	pr_debug("collecting relocating info for: '%s'\n",
-		 prog->section_name);
+	pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
 	nrels = shdr->sh_size / shdr->sh_entsize;
 
 	prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
@@ -1358,23 +1732,21 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	prog->nr_reloc = nrels;
 
 	for (i = 0; i < nrels; i++) {
-		GElf_Sym sym;
-		GElf_Rel rel;
-		unsigned int insn_idx;
-		unsigned int shdr_idx;
 		struct bpf_insn *insns = prog->insns;
 		enum libbpf_map_type type;
+		unsigned int insn_idx;
+		unsigned int shdr_idx;
 		const char *name;
 		size_t map_idx;
+		GElf_Sym sym;
+		GElf_Rel rel;
 
 		if (!gelf_getrel(data, i, &rel)) {
 			pr_warning("relocation: failed to get %d reloc\n", i);
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
-		if (!gelf_getsym(symbols,
-				 GELF_R_SYM(rel.r_info),
-				 &sym)) {
+		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
 			pr_warning("relocation: symbol %"PRIx64" not found\n",
 				   GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
@@ -1435,16 +1807,19 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 				if (maps[map_idx].libbpf_type != type)
 					continue;
 				if (type != LIBBPF_MAP_UNSPEC ||
-				    (type == LIBBPF_MAP_UNSPEC &&
-				     maps[map_idx].offset == sym.st_value)) {
-					pr_debug("relocation: find map %zd (%s) for insn %u\n",
-						 map_idx, maps[map_idx].name, insn_idx);
+				    (maps[map_idx].sec_idx == sym.st_shndx &&
+				     maps[map_idx].sec_offset == sym.st_value)) {
+					pr_debug("relocation: found map %zd (%s, sec_idx %d, offset %zu) for insn %u\n",
+						 map_idx, maps[map_idx].name,
+						 maps[map_idx].sec_idx,
+						 maps[map_idx].sec_offset,
+						 insn_idx);
 					break;
 				}
 			}
 
 			if (map_idx >= nr_maps) {
-				pr_warning("bpf relocation: map_idx %d large than %d\n",
+				pr_warning("bpf relocation: map_idx %d larger than %d\n",
 					   (int)map_idx, (int)nr_maps - 1);
 				return -LIBBPF_ERRNO__RELOC;
 			}
@@ -1458,14 +1833,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	return 0;
 }
 
-static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
+static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 {
 	struct bpf_map_def *def = &map->def;
 	__u32 key_type_id = 0, value_type_id = 0;
 	int ret;
 
+	/* if it's BTF-defined map, we don't need to search for type IDs */
+	if (map->sec_idx == obj->efile.btf_maps_shndx)
+		return 0;
+
 	if (!bpf_map__is_internal(map)) {
-		ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
+		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
 					   def->value_size, &key_type_id,
 					   &value_type_id);
 	} else {
@@ -1473,7 +1852,7 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
 		 * LLVM annotates global data differently in BTF, that is,
 		 * only as '.data', '.bss' or '.rodata'.
 		 */
-		ret = btf__find_by_name(btf,
+		ret = btf__find_by_name(obj->btf,
 				libbpf_type_to_btf_name[map->libbpf_type]);
 	}
 	if (ret < 0)
@@ -1737,6 +2116,7 @@ static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
 	struct bpf_create_map_attr create_attr = {};
+	int nr_cpus = 0;
 	unsigned int i;
 	int err;
 
@@ -1759,7 +2139,22 @@ bpf_object__create_maps(struct bpf_object *obj)
 		create_attr.map_flags = def->map_flags;
 		create_attr.key_size = def->key_size;
 		create_attr.value_size = def->value_size;
-		create_attr.max_entries = def->max_entries;
+		if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
+		    !def->max_entries) {
+			if (!nr_cpus)
+				nr_cpus = libbpf_num_possible_cpus();
+			if (nr_cpus < 0) {
+				pr_warning("failed to determine number of system CPUs: %d\n",
+					   nr_cpus);
+				err = nr_cpus;
+				goto err_out;
+			}
+			pr_debug("map '%s': setting size to %d\n",
+				 map->name, nr_cpus);
+			create_attr.max_entries = nr_cpus;
+		} else {
+			create_attr.max_entries = def->max_entries;
+		}
 		create_attr.btf_fd = 0;
 		create_attr.btf_key_type_id = 0;
 		create_attr.btf_value_type_id = 0;
@@ -1767,17 +2162,19 @@ bpf_object__create_maps(struct bpf_object *obj)
 		    map->inner_map_fd >= 0)
 			create_attr.inner_map_fd = map->inner_map_fd;
 
-		if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
+		if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
 			create_attr.btf_fd = btf__fd(obj->btf);
 			create_attr.btf_key_type_id = map->btf_key_type_id;
 			create_attr.btf_value_type_id = map->btf_value_type_id;
 		}
 
 		*pfd = bpf_create_map_xattr(&create_attr);
-		if (*pfd < 0 && create_attr.btf_key_type_id) {
-			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+		if (*pfd < 0 && (create_attr.btf_key_type_id ||
+				 create_attr.btf_value_type_id)) {
+			err = -errno;
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
-				   map->name, cp, errno);
+				   map->name, cp, err);
 			create_attr.btf_fd = 0;
 			create_attr.btf_key_type_id = 0;
 			create_attr.btf_value_type_id = 0;
@@ -1789,11 +2186,11 @@ bpf_object__create_maps(struct bpf_object *obj)
 		if (*pfd < 0) {
 			size_t j;
 
-			err = *pfd;
+			err = -errno;
 err_out:
-			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-			pr_warning("failed to create map (name: '%s'): %s\n",
-				   map->name, cp);
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+			pr_warning("failed to create map (name: '%s'): %s(%d)\n",
+				   map->name, cp, err);
 			for (j = 0; j < i; j++)
 				zclose(obj->maps[j].fd);
 			return err;
@@ -1807,7 +2204,7 @@ err_out:
 			}
 		}
 
-		pr_debug("create map %s: fd=%d\n", map->name, *pfd);
+		pr_debug("created map %s: fd=%d\n", map->name, *pfd);
 	}
 
 	return 0;
@@ -1828,18 +2225,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,
 	if (btf_prog_info) {
 		/*
 		 * Some info has already been found but has problem
-		 * in the last btf_ext reloc.  Must have to error
-		 * out.
+		 * in the last btf_ext reloc. Must have to error out.
 		 */
 		pr_warning("Error in relocating %s for sec %s.\n",
 			   info_name, prog->section_name);
 		return err;
 	}
 
-	/*
-	 * Have problem loading the very first info.  Ignore
-	 * the rest.
-	 */
+	/* Have problem loading the very first info. Ignore the rest. */
 	pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n",
 		   info_name, prog->section_name, info_name);
 	return 0;
@@ -2043,9 +2436,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
-		err = bpf_program__collect_reloc(prog,
-						 shdr, data,
-						 obj);
+		err = bpf_program__collect_reloc(prog, shdr, data, obj);
 		if (err)
 			return err;
 	}
@@ -2062,6 +2453,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	char *log_buf;
 	int ret;
 
+	if (!insns || !insns_cnt)
+		return -EINVAL;
+
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = prog->type;
 	load_attr.expected_attach_type = prog->expected_attach_type;
@@ -2080,8 +2474,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.line_info_rec_size = prog->line_info_rec_size;
 	load_attr.line_info_cnt = prog->line_info_cnt;
 	load_attr.log_level = prog->log_level;
-	if (!load_attr.insns || !load_attr.insns_cnt)
-		return -EINVAL;
+	load_attr.prog_flags = prog->prog_flags;
 
 retry_load:
 	log_buf = malloc(log_buf_size);
@@ -2219,14 +2612,14 @@ out:
 	return err;
 }
 
-static bool bpf_program__is_function_storage(struct bpf_program *prog,
-					     struct bpf_object *obj)
+static bool bpf_program__is_function_storage(const struct bpf_program *prog,
+					     const struct bpf_object *obj)
 {
 	return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
 }
 
 static int
-bpf_object__load_progs(struct bpf_object *obj)
+bpf_object__load_progs(struct bpf_object *obj, int log_level)
 {
 	size_t i;
 	int err;
@@ -2234,6 +2627,7 @@ bpf_object__load_progs(struct bpf_object *obj)
 	for (i = 0; i < obj->nr_programs; i++) {
 		if (bpf_program__is_function_storage(&obj->programs[i], obj))
 			continue;
+		obj->programs[i].log_level |= log_level;
 		err = bpf_program__load(&obj->programs[i],
 					obj->license,
 					obj->kern_version);
@@ -2270,6 +2664,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 		return false;
 	case BPF_PROG_TYPE_KPROBE:
 	default:
@@ -2360,11 +2755,9 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 		snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
 			 (unsigned long)obj_buf,
 			 (unsigned long)obj_buf_sz);
-		tmp_name[sizeof(tmp_name) - 1] = '\0';
 		name = tmp_name;
 	}
-	pr_debug("loading object '%s' from buffer\n",
-		 name);
+	pr_debug("loading object '%s' from buffer\n", name);
 
 	return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
 }
@@ -2385,10 +2778,14 @@ int bpf_object__unload(struct bpf_object *obj)
 	return 0;
 }
 
-int bpf_object__load(struct bpf_object *obj)
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 {
+	struct bpf_object *obj;
 	int err;
 
+	if (!attr)
+		return -EINVAL;
+	obj = attr->obj;
 	if (!obj)
 		return -EINVAL;
 
@@ -2401,7 +2798,7 @@ int bpf_object__load(struct bpf_object *obj)
 
 	CHECK_ERR(bpf_object__create_maps(obj), err, out);
 	CHECK_ERR(bpf_object__relocate(obj), err, out);
-	CHECK_ERR(bpf_object__load_progs(obj), err, out);
+	CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
 
 	return 0;
 out:
@@ -2410,6 +2807,15 @@ out:
 	return err;
 }
 
+int bpf_object__load(struct bpf_object *obj)
+{
+	struct bpf_object_load_attr attr = {
+		.obj = obj,
+	};
+
+	return bpf_object__load_xattr(&attr);
+}
+
 static int check_path(const char *path)
 {
 	char *cp, errmsg[STRERR_BUFSIZE];
@@ -2914,17 +3320,17 @@ bpf_object__next(struct bpf_object *prev)
 	return next;
 }
 
-const char *bpf_object__name(struct bpf_object *obj)
+const char *bpf_object__name(const struct bpf_object *obj)
 {
 	return obj ? obj->path : ERR_PTR(-EINVAL);
 }
 
-unsigned int bpf_object__kversion(struct bpf_object *obj)
+unsigned int bpf_object__kversion(const struct bpf_object *obj)
 {
 	return obj ? obj->kern_version : 0;
 }
 
-struct btf *bpf_object__btf(struct bpf_object *obj)
+struct btf *bpf_object__btf(const struct bpf_object *obj)
 {
 	return obj ? obj->btf : NULL;
 }
@@ -2945,13 +3351,14 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 	return 0;
 }
 
-void *bpf_object__priv(struct bpf_object *obj)
+void *bpf_object__priv(const struct bpf_object *obj)
 {
 	return obj ? obj->priv : ERR_PTR(-EINVAL);
 }
 
 static struct bpf_program *
-__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward)
+__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
+		    bool forward)
 {
 	size_t nr_programs = obj->nr_programs;
 	ssize_t idx;
@@ -2976,7 +3383,7 @@ __bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward)
 }
 
 struct bpf_program *
-bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
 {
 	struct bpf_program *prog = prev;
 
@@ -2988,7 +3395,7 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 }
 
 struct bpf_program *
-bpf_program__prev(struct bpf_program *next, struct bpf_object *obj)
+bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
 {
 	struct bpf_program *prog = next;
 
@@ -3010,7 +3417,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 	return 0;
 }
 
-void *bpf_program__priv(struct bpf_program *prog)
+void *bpf_program__priv(const struct bpf_program *prog)
 {
 	return prog ? prog->priv : ERR_PTR(-EINVAL);
 }
@@ -3020,7 +3427,7 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
 	prog->prog_ifindex = ifindex;
 }
 
-const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
 {
 	const char *title;
 
@@ -3036,7 +3443,7 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
 	return title;
 }
 
-int bpf_program__fd(struct bpf_program *prog)
+int bpf_program__fd(const struct bpf_program *prog)
 {
 	return bpf_program__nth_fd(prog, 0);
 }
@@ -3069,7 +3476,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
 	return 0;
 }
 
-int bpf_program__nth_fd(struct bpf_program *prog, int n)
+int bpf_program__nth_fd(const struct bpf_program *prog, int n)
 {
 	int fd;
 
@@ -3097,25 +3504,25 @@ void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 	prog->type = type;
 }
 
-static bool bpf_program__is_type(struct bpf_program *prog,
+static bool bpf_program__is_type(const struct bpf_program *prog,
 				 enum bpf_prog_type type)
 {
 	return prog ? (prog->type == type) : false;
 }
 
-#define BPF_PROG_TYPE_FNS(NAME, TYPE)			\
-int bpf_program__set_##NAME(struct bpf_program *prog)	\
-{							\
-	if (!prog)					\
-		return -EINVAL;				\
-	bpf_program__set_type(prog, TYPE);		\
-	return 0;					\
-}							\
-							\
-bool bpf_program__is_##NAME(struct bpf_program *prog)	\
-{							\
-	return bpf_program__is_type(prog, TYPE);	\
-}							\
+#define BPF_PROG_TYPE_FNS(NAME, TYPE)				\
+int bpf_program__set_##NAME(struct bpf_program *prog)		\
+{								\
+	if (!prog)						\
+		return -EINVAL;					\
+	bpf_program__set_type(prog, TYPE);			\
+	return 0;						\
+}								\
+								\
+bool bpf_program__is_##NAME(const struct bpf_program *prog)	\
+{								\
+	return bpf_program__is_type(prog, TYPE);		\
+}								\
 
 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
@@ -3216,6 +3623,10 @@ static const struct {
 						BPF_CGROUP_UDP6_RECVMSG),
 	BPF_EAPROG_SEC("cgroup/sysctl",		BPF_PROG_TYPE_CGROUP_SYSCTL,
 						BPF_CGROUP_SYSCTL),
+	BPF_EAPROG_SEC("cgroup/getsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+						BPF_CGROUP_GETSOCKOPT),
+	BPF_EAPROG_SEC("cgroup/setsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+						BPF_CGROUP_SETSOCKOPT),
 };
 
 #undef BPF_PROG_SEC_IMPL
@@ -3314,17 +3725,17 @@ bpf_program__identify_section(struct bpf_program *prog,
 					expected_attach_type);
 }
 
-int bpf_map__fd(struct bpf_map *map)
+int bpf_map__fd(const struct bpf_map *map)
 {
 	return map ? map->fd : -EINVAL;
 }
 
-const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
 {
 	return map ? &map->def : ERR_PTR(-EINVAL);
 }
 
-const char *bpf_map__name(struct bpf_map *map)
+const char *bpf_map__name(const struct bpf_map *map)
 {
 	return map ? map->name : NULL;
 }
@@ -3355,17 +3766,17 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
 	return 0;
 }
 
-void *bpf_map__priv(struct bpf_map *map)
+void *bpf_map__priv(const struct bpf_map *map)
 {
 	return map ? map->priv : ERR_PTR(-EINVAL);
 }
 
-bool bpf_map__is_offload_neutral(struct bpf_map *map)
+bool bpf_map__is_offload_neutral(const struct bpf_map *map)
 {
 	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
 }
 
-bool bpf_map__is_internal(struct bpf_map *map)
+bool bpf_map__is_internal(const struct bpf_map *map)
 {
 	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 }
@@ -3390,7 +3801,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 }
 
 static struct bpf_map *
-__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)
+__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 {
 	ssize_t idx;
 	struct bpf_map *s, *e;
@@ -3414,7 +3825,7 @@ __bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)
 }
 
 struct bpf_map *
-bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
+bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
 {
 	if (prev == NULL)
 		return obj->maps;
@@ -3423,7 +3834,7 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
 }
 
 struct bpf_map *
-bpf_map__prev(struct bpf_map *next, struct bpf_object *obj)
+bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
 {
 	if (next == NULL) {
 		if (!obj->nr_maps)
@@ -3435,7 +3846,7 @@ bpf_map__prev(struct bpf_map *next, struct bpf_object *obj)
 }
 
 struct bpf_map *
-bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
 {
 	struct bpf_map *pos;
 
@@ -3447,7 +3858,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
 }
 
 int
-bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 {
 	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
 }
@@ -3455,20 +3866,12 @@ bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name)
 struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
 {
-	int i;
-
-	for (i = 0; i < obj->nr_maps; i++) {
-		if (obj->maps[i].offset == offset)
-			return &obj->maps[i];
-	}
-	return ERR_PTR(-ENOENT);
+	return ERR_PTR(-ENOTSUP);
 }
 
 long libbpf_get_error(const void *ptr)
 {
-	if (IS_ERR(ptr))
-		return PTR_ERR(ptr);
-	return 0;
+	return PTR_ERR_OR_ZERO(ptr);
 }
 
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
@@ -3487,10 +3890,7 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 			struct bpf_object **pobj, int *prog_fd)
 {
-	struct bpf_object_open_attr open_attr = {
-		.file		= attr->file,
-		.prog_type	= attr->prog_type,
-	};
+	struct bpf_object_open_attr open_attr = {};
 	struct bpf_program *prog, *first_prog = NULL;
 	enum bpf_attach_type expected_attach_type;
 	enum bpf_prog_type prog_type;
@@ -3503,6 +3903,9 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	if (!attr->file)
 		return -EINVAL;
 
+	open_attr.file = attr->file;
+	open_attr.prog_type = attr->prog_type;
+
 	obj = bpf_object__open_xattr(&open_attr);
 	if (IS_ERR_OR_NULL(obj))
 		return -ENOENT;
@@ -3529,6 +3932,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 						      expected_attach_type);
 
 		prog->log_level = attr->log_level;
+		prog->prog_flags = attr->prog_flags;
 		if (!first_prog)
 			first_prog = prog;
 	}
@@ -3555,6 +3959,372 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	return 0;
 }
 
+struct bpf_link {
+	int (*destroy)(struct bpf_link *link);
+};
+
+int bpf_link__destroy(struct bpf_link *link)
+{
+	int err;
+
+	if (!link)
+		return 0;
+
+	err = link->destroy(link);
+	free(link);
+
+	return err;
+}
+
+struct bpf_link_fd {
+	struct bpf_link link; /* has to be at the top of struct */
+	int fd; /* hook FD */
+};
+
+static int bpf_link__destroy_perf_event(struct bpf_link *link)
+{
+	struct bpf_link_fd *l = (void *)link;
+	int err;
+
+	err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
+	if (err)
+		err = -errno;
+
+	close(l->fd);
+	return err;
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
+						int pfd)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link_fd *link;
+	int prog_fd, err;
+
+	if (pfd < 0) {
+		pr_warning("program '%s': invalid perf event FD %d\n",
+			   bpf_program__title(prog, false), pfd);
+		return ERR_PTR(-EINVAL);
+	}
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
+			   bpf_program__title(prog, false));
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = malloc(sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->link.destroy = &bpf_link__destroy_perf_event;
+	link->fd = pfd;
+
+	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+		err = -errno;
+		free(link);
+		pr_warning("program '%s': failed to attach to pfd %d: %s\n",
+			   bpf_program__title(prog, false), pfd,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return ERR_PTR(err);
+	}
+	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+		err = -errno;
+		free(link);
+		pr_warning("program '%s': failed to enable pfd %d: %s\n",
+			   bpf_program__title(prog, false), pfd,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return ERR_PTR(err);
+	}
+	return (struct bpf_link *)link;
+}
+
+/*
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
+ * given file using scanf format string fmt. If actual parsed value is
+ * negative, the result might be indistinguishable from error
+ */
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+	char buf[STRERR_BUFSIZE];
+	int err, ret;
+	FILE *f;
+
+	f = fopen(file, "r");
+	if (!f) {
+		err = -errno;
+		pr_debug("failed to open '%s': %s\n", file,
+			 libbpf_strerror_r(err, buf, sizeof(buf)));
+		return err;
+	}
+	err = fscanf(f, fmt, &ret);
+	if (err != 1) {
+		err = err == EOF ? -EIO : -errno;
+		pr_debug("failed to parse '%s': %s\n", file,
+			libbpf_strerror_r(err, buf, sizeof(buf)));
+		fclose(f);
+		return err;
+	}
+	fclose(f);
+	return ret;
+}
+
+static int determine_kprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_kprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
+				 uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	type = uprobe ? determine_uprobe_perf_type()
+		      : determine_kprobe_perf_type();
+	if (type < 0) {
+		pr_warning("failed to determine %s perf type: %s\n",
+			   uprobe ? "uprobe" : "kprobe",
+			   libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+	if (retprobe) {
+		int bit = uprobe ? determine_uprobe_retprobe_bit()
+				 : determine_kprobe_retprobe_bit();
+
+		if (bit < 0) {
+			pr_warning("failed to determine %s retprobe bit: %s\n",
+				   uprobe ? "uprobe" : "kprobe",
+				   libbpf_strerror_r(bit, errmsg,
+						     sizeof(errmsg)));
+			return bit;
+		}
+		attr.config |= 1 << bit;
+	}
+	attr.size = sizeof(attr);
+	attr.type = type;
+	attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */
+	attr.config2 = offset;		       /* kprobe_addr or probe_offset */
+
+	/* pid filter is meaningful only for uprobes */
+	pfd = syscall(__NR_perf_event_open, &attr,
+		      pid < 0 ? -1 : pid /* pid */,
+		      pid == -1 ? 0 : -1 /* cpu */,
+		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warning("%s perf_event_open() failed: %s\n",
+			   uprobe ? "uprobe" : "kprobe",
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+					    bool retprobe,
+					    const char *func_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+				    0 /* offset */, -1 /* pid */);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create %s '%s' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "kretprobe" : "kprobe", func_name,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to %s '%s': %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "kretprobe" : "kprobe", func_name,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+					    bool retprobe, pid_t pid,
+					    const char *binary_path,
+					    size_t func_offset)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(true /* uprobe */, retprobe,
+				    binary_path, func_offset, pid);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "uretprobe" : "uprobe",
+			   binary_path, func_offset,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "uretprobe" : "uprobe",
+			   binary_path, func_offset,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+static int determine_tracepoint_id(const char *tp_category,
+				   const char *tp_name)
+{
+	char file[PATH_MAX];
+	int ret;
+
+	ret = snprintf(file, sizeof(file),
+		       "/sys/kernel/debug/tracing/events/%s/%s/id",
+		       tp_category, tp_name);
+	if (ret < 0)
+		return -errno;
+	if (ret >= sizeof(file)) {
+		pr_debug("tracepoint %s/%s path is too long\n",
+			 tp_category, tp_name);
+		return -E2BIG;
+	}
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int perf_event_open_tracepoint(const char *tp_category,
+				      const char *tp_name)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int tp_id, pfd, err;
+
+	tp_id = determine_tracepoint_id(tp_category, tp_name);
+	if (tp_id < 0) {
+		pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
+			   tp_category, tp_name,
+			   libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
+		return tp_id;
+	}
+
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.size = sizeof(attr);
+	attr.config = tp_id;
+
+	pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
+		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n",
+			   tp_category, tp_name,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+						const char *tp_category,
+						const char *tp_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_tracepoint(tp_category, tp_name);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   tp_category, tp_name,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
+			   bpf_program__title(prog, false),
+			   tp_category, tp_name,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+static int bpf_link__destroy_fd(struct bpf_link *link)
+{
+	struct bpf_link_fd *l = (void *)link;
+
+	return close(l->fd);
+}
+
+struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+						    const char *tp_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link_fd *link;
+	int prog_fd, pfd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warning("program '%s': can't attach before loaded\n",
+			   bpf_program__title(prog, false));
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = malloc(sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->link.destroy = &bpf_link__destroy_fd;
+
+	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
+	if (pfd < 0) {
+		pfd = -errno;
+		free(link);
+		pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n",
+			   bpf_program__title(prog, false), tp_name,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link->fd = pfd;
+	return (struct bpf_link *)link;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 			   void **copy_mem, size_t *copy_size,
@@ -3603,6 +4373,370 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 	return ret;
 }
 
+struct perf_buffer;
+
+struct perf_buffer_params {
+	struct perf_event_attr *attr;
+	/* if event_cb is specified, it takes precendence */
+	perf_buffer_event_fn event_cb;
+	/* sample_cb and lost_cb are higher-level common-case callbacks */
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx;
+	int cpu_cnt;
+	int *cpus;
+	int *map_keys;
+};
+
+struct perf_cpu_buf {
+	struct perf_buffer *pb;
+	void *base; /* mmap()'ed memory */
+	void *buf; /* for reconstructing segmented data */
+	size_t buf_size;
+	int fd;
+	int cpu;
+	int map_key;
+};
+
+struct perf_buffer {
+	perf_buffer_event_fn event_cb;
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx; /* passed into callbacks */
+
+	size_t page_size;
+	size_t mmap_size;
+	struct perf_cpu_buf **cpu_bufs;
+	struct epoll_event *events;
+	int cpu_cnt;
+	int epoll_fd; /* perf event FD */
+	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
+				      struct perf_cpu_buf *cpu_buf)
+{
+	if (!cpu_buf)
+		return;
+	if (cpu_buf->base &&
+	    munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
+		pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
+	if (cpu_buf->fd >= 0) {
+		ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
+		close(cpu_buf->fd);
+	}
+	free(cpu_buf->buf);
+	free(cpu_buf);
+}
+
+void perf_buffer__free(struct perf_buffer *pb)
+{
+	int i;
+
+	if (!pb)
+		return;
+	if (pb->cpu_bufs) {
+		for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+			struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+			bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
+			perf_buffer__free_cpu_buf(pb, cpu_buf);
+		}
+		free(pb->cpu_bufs);
+	}
+	if (pb->epoll_fd >= 0)
+		close(pb->epoll_fd);
+	free(pb->events);
+	free(pb);
+}
+
+static struct perf_cpu_buf *
+perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
+			  int cpu, int map_key)
+{
+	struct perf_cpu_buf *cpu_buf;
+	char msg[STRERR_BUFSIZE];
+	int err;
+
+	cpu_buf = calloc(1, sizeof(*cpu_buf));
+	if (!cpu_buf)
+		return ERR_PTR(-ENOMEM);
+
+	cpu_buf->pb = pb;
+	cpu_buf->cpu = cpu;
+	cpu_buf->map_key = map_key;
+
+	cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
+			      -1, PERF_FLAG_FD_CLOEXEC);
+	if (cpu_buf->fd < 0) {
+		err = -errno;
+		pr_warning("failed to open perf buffer event on cpu #%d: %s\n",
+			   cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
+			     PROT_READ | PROT_WRITE, MAP_SHARED,
+			     cpu_buf->fd, 0);
+	if (cpu_buf->base == MAP_FAILED) {
+		cpu_buf->base = NULL;
+		err = -errno;
+		pr_warning("failed to mmap perf buffer on cpu #%d: %s\n",
+			   cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+		err = -errno;
+		pr_warning("failed to enable perf buffer event on cpu #%d: %s\n",
+			   cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	return cpu_buf;
+
+error:
+	perf_buffer__free_cpu_buf(pb, cpu_buf);
+	return (struct perf_cpu_buf *)ERR_PTR(err);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+					      struct perf_buffer_params *p);
+
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+				     const struct perf_buffer_opts *opts)
+{
+	struct perf_buffer_params p = {};
+	struct perf_event_attr attr = {
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.type = PERF_TYPE_SOFTWARE,
+		.sample_type = PERF_SAMPLE_RAW,
+		.sample_period = 1,
+		.wakeup_events = 1,
+	};
+
+	p.attr = &attr;
+	p.sample_cb = opts ? opts->sample_cb : NULL;
+	p.lost_cb = opts ? opts->lost_cb : NULL;
+	p.ctx = opts ? opts->ctx : NULL;
+
+	return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+		     const struct perf_buffer_raw_opts *opts)
+{
+	struct perf_buffer_params p = {};
+
+	p.attr = opts->attr;
+	p.event_cb = opts->event_cb;
+	p.ctx = opts->ctx;
+	p.cpu_cnt = opts->cpu_cnt;
+	p.cpus = opts->cpus;
+	p.map_keys = opts->map_keys;
+
+	return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+					      struct perf_buffer_params *p)
+{
+	struct bpf_map_info map = {};
+	char msg[STRERR_BUFSIZE];
+	struct perf_buffer *pb;
+	__u32 map_info_len;
+	int err, i;
+
+	if (page_cnt & (page_cnt - 1)) {
+		pr_warning("page count should be power of two, but is %zu\n",
+			   page_cnt);
+		return ERR_PTR(-EINVAL);
+	}
+
+	map_info_len = sizeof(map);
+	err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
+	if (err) {
+		err = -errno;
+		pr_warning("failed to get map info for map FD %d: %s\n",
+			   map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+		return ERR_PTR(err);
+	}
+
+	if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+		pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+			   map.name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	pb = calloc(1, sizeof(*pb));
+	if (!pb)
+		return ERR_PTR(-ENOMEM);
+
+	pb->event_cb = p->event_cb;
+	pb->sample_cb = p->sample_cb;
+	pb->lost_cb = p->lost_cb;
+	pb->ctx = p->ctx;
+
+	pb->page_size = getpagesize();
+	pb->mmap_size = pb->page_size * page_cnt;
+	pb->map_fd = map_fd;
+
+	pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (pb->epoll_fd < 0) {
+		err = -errno;
+		pr_warning("failed to create epoll instance: %s\n",
+			   libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	if (p->cpu_cnt > 0) {
+		pb->cpu_cnt = p->cpu_cnt;
+	} else {
+		pb->cpu_cnt = libbpf_num_possible_cpus();
+		if (pb->cpu_cnt < 0) {
+			err = pb->cpu_cnt;
+			goto error;
+		}
+		if (map.max_entries < pb->cpu_cnt)
+			pb->cpu_cnt = map.max_entries;
+	}
+
+	pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
+	if (!pb->events) {
+		err = -ENOMEM;
+		pr_warning("failed to allocate events: out of memory\n");
+		goto error;
+	}
+	pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
+	if (!pb->cpu_bufs) {
+		err = -ENOMEM;
+		pr_warning("failed to allocate buffers: out of memory\n");
+		goto error;
+	}
+
+	for (i = 0; i < pb->cpu_cnt; i++) {
+		struct perf_cpu_buf *cpu_buf;
+		int cpu, map_key;
+
+		cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
+		map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
+
+		cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
+		if (IS_ERR(cpu_buf)) {
+			err = PTR_ERR(cpu_buf);
+			goto error;
+		}
+
+		pb->cpu_bufs[i] = cpu_buf;
+
+		err = bpf_map_update_elem(pb->map_fd, &map_key,
+					  &cpu_buf->fd, 0);
+		if (err) {
+			err = -errno;
+			pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
+				   cpu, map_key, cpu_buf->fd,
+				   libbpf_strerror_r(err, msg, sizeof(msg)));
+			goto error;
+		}
+
+		pb->events[i].events = EPOLLIN;
+		pb->events[i].data.ptr = cpu_buf;
+		if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
+			      &pb->events[i]) < 0) {
+			err = -errno;
+			pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
+				   cpu, cpu_buf->fd,
+				   libbpf_strerror_r(err, msg, sizeof(msg)));
+			goto error;
+		}
+	}
+
+	return pb;
+
+error:
+	if (pb)
+		perf_buffer__free(pb);
+	return ERR_PTR(err);
+}
+
+struct perf_sample_raw {
+	struct perf_event_header header;
+	uint32_t size;
+	char data[0];
+};
+
+struct perf_sample_lost {
+	struct perf_event_header header;
+	uint64_t id;
+	uint64_t lost;
+	uint64_t sample_id;
+};
+
+static enum bpf_perf_event_ret
+perf_buffer__process_record(struct perf_event_header *e, void *ctx)
+{
+	struct perf_cpu_buf *cpu_buf = ctx;
+	struct perf_buffer *pb = cpu_buf->pb;
+	void *data = e;
+
+	/* user wants full control over parsing perf event */
+	if (pb->event_cb)
+		return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
+
+	switch (e->type) {
+	case PERF_RECORD_SAMPLE: {
+		struct perf_sample_raw *s = data;
+
+		if (pb->sample_cb)
+			pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
+		break;
+	}
+	case PERF_RECORD_LOST: {
+		struct perf_sample_lost *s = data;
+
+		if (pb->lost_cb)
+			pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
+		break;
+	}
+	default:
+		pr_warning("unknown perf sample type %d\n", e->type);
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static int perf_buffer__process_records(struct perf_buffer *pb,
+					struct perf_cpu_buf *cpu_buf)
+{
+	enum bpf_perf_event_ret ret;
+
+	ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+					 pb->page_size, &cpu_buf->buf,
+					 &cpu_buf->buf_size,
+					 perf_buffer__process_record, cpu_buf);
+	if (ret != LIBBPF_PERF_EVENT_CONT)
+		return ret;
+	return 0;
+}
+
+int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
+{
+	int i, cnt, err;
+
+	cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+	for (i = 0; i < cnt; i++) {
+		struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
+
+		err = perf_buffer__process_records(pb, cpu_buf);
+		if (err) {
+			pr_warning("error while processing records: %d\n", err);
+			return err;
+		}
+	}
+	return cnt < 0 ? -errno : cnt;
+}
+
 struct bpf_prog_info_array_desc {
 	int	array_offset;	/* e.g. offset of jited_prog_insns */
 	int	count_offset;	/* e.g. offset of jited_prog_len */
@@ -3848,3 +4982,60 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
 					     desc->array_offset, addr);
 	}
 }
+
+int libbpf_num_possible_cpus(void)
+{
+	static const char *fcpu = "/sys/devices/system/cpu/possible";
+	int len = 0, n = 0, il = 0, ir = 0;
+	unsigned int start = 0, end = 0;
+	static int cpus;
+	char buf[128];
+	int error = 0;
+	int fd = -1;
+
+	if (cpus > 0)
+		return cpus;
+
+	fd = open(fcpu, O_RDONLY);
+	if (fd < 0) {
+		error = errno;
+		pr_warning("Failed to open file %s: %s\n",
+			   fcpu, strerror(error));
+		return -error;
+	}
+	len = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (len <= 0) {
+		error = len ? errno : EINVAL;
+		pr_warning("Failed to read # of possible cpus from %s: %s\n",
+			   fcpu, strerror(error));
+		return -error;
+	}
+	if (len == sizeof(buf)) {
+		pr_warning("File %s size overflow\n", fcpu);
+		return -EOVERFLOW;
+	}
+	buf[len] = '\0';
+
+	for (ir = 0, cpus = 0; ir <= len; ir++) {
+		/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
+		if (buf[ir] == ',' || buf[ir] == '\0') {
+			buf[ir] = '\0';
+			n = sscanf(&buf[il], "%u-%u", &start, &end);
+			if (n <= 0) {
+				pr_warning("Failed to get # CPUs from %s\n",
+					   &buf[il]);
+				return -EINVAL;
+			} else if (n == 1) {
+				end = start;
+			}
+			cpus += end - start + 1;
+			il = ir + 1;
+		}
+	}
+	if (cpus <= 0) {
+		pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu);
+		return -EINVAL;
+	}
+	return cpus;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c5ff00515ce7..5cbf459ece0b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -89,18 +89,25 @@ LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
 LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
 LIBBPF_API void bpf_object__close(struct bpf_object *object);
 
+struct bpf_object_load_attr {
+	struct bpf_object *obj;
+	int log_level;
+};
+
 /* Load/unload object into/from kernel */
 LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
 LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
-LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
-LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
+LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
+LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
 
 struct btf;
-LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj);
+LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
 LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
 
 LIBBPF_API struct bpf_program *
-bpf_object__find_program_by_title(struct bpf_object *obj, const char *title);
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+				  const char *title);
 
 LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
 #define bpf_object__for_each_safe(pos, tmp)			\
@@ -112,7 +119,7 @@ LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
 typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
 LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 				    bpf_object_clear_priv_t clear_priv);
-LIBBPF_API void *bpf_object__priv(struct bpf_object *prog);
+LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
 
 LIBBPF_API int
 libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
@@ -123,7 +130,7 @@ LIBBPF_API int libbpf_attach_type_by_name(const char *name,
 /* Accessors of bpf_program */
 struct bpf_program;
 LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
-						 struct bpf_object *obj);
+						 const struct bpf_object *obj);
 
 #define bpf_object__for_each_program(pos, obj)		\
 	for ((pos) = bpf_program__next(NULL, (obj));	\
@@ -131,24 +138,23 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
 	     (pos) = bpf_program__next((pos), (obj)))
 
 LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
-						 struct bpf_object *obj);
+						 const struct bpf_object *obj);
 
-typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
-					 void *);
+typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
 
 LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 				     bpf_program_clear_priv_t clear_priv);
 
-LIBBPF_API void *bpf_program__priv(struct bpf_program *prog);
+LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
 					 __u32 ifindex);
 
-LIBBPF_API const char *bpf_program__title(struct bpf_program *prog,
+LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
 					  bool needs_copy);
 
 LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
 				 __u32 kern_version);
-LIBBPF_API int bpf_program__fd(struct bpf_program *prog);
+LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
 LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
 					 const char *path,
 					 int instance);
@@ -159,6 +165,27 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
 LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
 LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
 
+struct bpf_link;
+
+LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+			   const char *func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+			   pid_t pid, const char *binary_path,
+			   size_t func_offset);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint(struct bpf_program *prog,
+			       const char *tp_category,
+			       const char *tp_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+				   const char *tp_name);
+
 struct bpf_insn;
 
 /*
@@ -221,7 +248,7 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
 LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
 				     bpf_program_prep_t prep);
 
-LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n);
+LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
 
 /*
  * Adjust type of BPF program. Default is kprobe.
@@ -240,14 +267,14 @@ LIBBPF_API void
 bpf_program__set_expected_attach_type(struct bpf_program *prog,
 				      enum bpf_attach_type type);
 
-LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
 
 /*
  * No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -269,10 +296,10 @@ struct bpf_map_def {
  */
 struct bpf_map;
 LIBBPF_API struct bpf_map *
-bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
 
 LIBBPF_API int
-bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
 
 /*
  * Get bpf_map through the offset of corresponding struct bpf_map_def
@@ -282,7 +309,7 @@ LIBBPF_API struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
 
 LIBBPF_API struct bpf_map *
-bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
+bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
 #define bpf_object__for_each_map(pos, obj)		\
 	for ((pos) = bpf_map__next(NULL, (obj));	\
 	     (pos) != NULL;				\
@@ -290,22 +317,22 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 #define bpf_map__for_each bpf_object__for_each_map
 
 LIBBPF_API struct bpf_map *
-bpf_map__prev(struct bpf_map *map, struct bpf_object *obj);
+bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
 
-LIBBPF_API int bpf_map__fd(struct bpf_map *map);
-LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
-LIBBPF_API const char *bpf_map__name(struct bpf_map *map);
+LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
 LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
 LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
 				 bpf_map_clear_priv_t clear_priv);
-LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
+LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
-LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
-LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
@@ -320,6 +347,7 @@ struct bpf_prog_load_attr {
 	enum bpf_attach_type expected_attach_type;
 	int ifindex;
 	int log_level;
+	int prog_flags;
 };
 
 LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
@@ -330,6 +358,26 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
 LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
 LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
 
+struct perf_buffer;
+
+typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
+				      void *data, __u32 size);
+typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
+
+/* common use perf buffer options */
+struct perf_buffer_opts {
+	/* if specified, sample_cb is called for each sample */
+	perf_buffer_sample_fn sample_cb;
+	/* if specified, lost_cb is called for each batch of lost samples */
+	perf_buffer_lost_fn lost_cb;
+	/* ctx is provided to sample_cb and lost_cb */
+	void *ctx;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new(int map_fd, size_t page_cnt,
+		 const struct perf_buffer_opts *opts);
+
 enum bpf_perf_event_ret {
 	LIBBPF_PERF_EVENT_DONE	= 0,
 	LIBBPF_PERF_EVENT_ERROR	= -1,
@@ -337,6 +385,35 @@ enum bpf_perf_event_ret {
 };
 
 struct perf_event_header;
+
+typedef enum bpf_perf_event_ret
+(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event);
+
+/* raw perf buffer options, giving most power and control */
+struct perf_buffer_raw_opts {
+	/* perf event attrs passed directly into perf_event_open() */
+	struct perf_event_attr *attr;
+	/* raw event callback */
+	perf_buffer_event_fn event_cb;
+	/* ctx is provided to event_cb */
+	void *ctx;
+	/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
+	 * max_entries of given PERF_EVENT_ARRAY map)
+	 */
+	int cpu_cnt;
+	/* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */
+	int *cpus;
+	/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
+	int *map_keys;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+		     const struct perf_buffer_raw_opts *opts);
+
+LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+
 typedef enum bpf_perf_event_ret
 	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
 				  void *private_data);
@@ -447,6 +524,22 @@ bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
 LIBBPF_API void
 bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
 
+/*
+ * A helper function to get the number of possible CPUs before looking up
+ * per-CPU maps. Negative errno is returned on failure.
+ *
+ * Example usage:
+ *
+ *     int ncpus = libbpf_num_possible_cpus();
+ *     if (ncpus < 0) {
+ *          // error handling
+ *     }
+ *     long values[ncpus];
+ *     bpf_map_lookup_elem(per_cpu_map_fd, key, values);
+ *
+ */
+LIBBPF_API int libbpf_num_possible_cpus(void);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 673001787cba..f9d316e873d8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -164,3 +164,23 @@ LIBBPF_0.0.3 {
 		bpf_map_freeze;
 		btf__finalize_data;
 } LIBBPF_0.0.2;
+
+LIBBPF_0.0.4 {
+	global:
+		bpf_link__destroy;
+		bpf_object__load_xattr;
+		bpf_program__attach_kprobe;
+		bpf_program__attach_perf_event;
+		bpf_program__attach_raw_tracepoint;
+		bpf_program__attach_tracepoint;
+		bpf_program__attach_uprobe;
+		btf_dump__dump_type;
+		btf_dump__free;
+		btf_dump__new;
+		btf__parse_elf;
+		libbpf_num_possible_cpus;
+		perf_buffer__free;
+		perf_buffer__new;
+		perf_buffer__new_raw;
+		perf_buffer__poll;
+} LIBBPF_0.0.3;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index dfab8012185c..2ac29bd36226 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -9,6 +9,8 @@
 #ifndef __LIBBPF_LIBBPF_INTERNAL_H
 #define __LIBBPF_LIBBPF_INTERNAL_H
 
+#include "libbpf.h"
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
@@ -21,6 +23,13 @@
 #define BTF_PARAM_ENC(name, type) (name), (type)
 #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
 
+#ifndef min
+# define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
+#ifndef max
+# define max(x, y) ((x) < (y) ? (y) : (x))
+#endif
+
 extern void libbpf_print(enum libbpf_print_level level,
 			 const char *format, ...)
 	__attribute__((format(printf, 2, 3)));
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 6635a31a7a16..ace1a0708d99 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -101,6 +101,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_SK_REUSEPORT:
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	default:
 		break;
 	}
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
index 00e48ac5b806..b8064eedc177 100644
--- a/tools/lib/bpf/str_error.c
+++ b/tools/lib/bpf/str_error.c
@@ -11,7 +11,7 @@
  */
 char *libbpf_strerror_r(int err, char *dst, int len)
 {
-	int ret = strerror_r(err, dst, len);
+	int ret = strerror_r(err < 0 ? -err : err, dst, len);
 	if (ret)
 		snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
 	return dst;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 38667b62f1fe..b33740221b7e 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -60,13 +60,12 @@ struct xsk_socket {
 	struct xsk_umem *umem;
 	struct xsk_socket_config config;
 	int fd;
-	int xsks_map;
 	int ifindex;
 	int prog_fd;
-	int qidconf_map_fd;
 	int xsks_map_fd;
 	__u32 queue_id;
 	char ifname[IFNAMSIZ];
+	bool zc;
 };
 
 struct xsk_nl_info {
@@ -265,15 +264,11 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 	/* This is the C-program:
 	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
 	 * {
-	 *     int *qidconf, index = ctx->rx_queue_index;
+	 *     int index = ctx->rx_queue_index;
 	 *
 	 *     // A set entry here means that the correspnding queue_id
 	 *     // has an active AF_XDP socket bound to it.
-	 *     qidconf = bpf_map_lookup_elem(&qidconf_map, &index);
-	 *     if (!qidconf)
-	 *         return XDP_ABORTED;
-	 *
-	 *     if (*qidconf)
+	 *     if (bpf_map_lookup_elem(&xsks_map, &index))
 	 *         return bpf_redirect_map(&xsks_map, index, 0);
 	 *
 	 *     return XDP_PASS;
@@ -286,15 +281,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
 		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
-		BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
 		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-		BPF_MOV32_IMM(BPF_REG_0, 0),
-		/* if r1 == 0 goto +8 */
-		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
 		BPF_MOV32_IMM(BPF_REG_0, 2),
-		/* r1 = *(u32 *)(r1 + 0) */
-		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
 		/* if r1 == 0 goto +5 */
 		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
 		/* r2 = *(u32 *)(r10 - 4) */
@@ -337,7 +327,8 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
 
 	channels.cmd = ETHTOOL_GCHANNELS;
 	ifr.ifr_data = (void *)&channels;
-	strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ);
+	strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
 	err = ioctl(fd, SIOCETHTOOL, &ifr);
 	if (err && errno != EOPNOTSUPP) {
 		ret = -errno;
@@ -366,18 +357,11 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 	if (max_queues < 0)
 		return max_queues;
 
-	fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map",
+	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
 				 sizeof(int), sizeof(int), max_queues, 0);
 	if (fd < 0)
 		return fd;
-	xsk->qidconf_map_fd = fd;
 
-	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
-				 sizeof(int), sizeof(int), max_queues, 0);
-	if (fd < 0) {
-		close(xsk->qidconf_map_fd);
-		return fd;
-	}
 	xsk->xsks_map_fd = fd;
 
 	return 0;
@@ -385,10 +369,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 
 static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 {
-	close(xsk->qidconf_map_fd);
+	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
 	close(xsk->xsks_map_fd);
-	xsk->qidconf_map_fd = -1;
-	xsk->xsks_map_fd = -1;
 }
 
 static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -417,10 +399,9 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	if (err)
 		goto out_map_ids;
 
-	for (i = 0; i < prog_info.nr_map_ids; i++) {
-		if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
-			break;
+	xsk->xsks_map_fd = -1;
 
+	for (i = 0; i < prog_info.nr_map_ids; i++) {
 		fd = bpf_map_get_fd_by_id(map_ids[i]);
 		if (fd < 0)
 			continue;
@@ -431,11 +412,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 			continue;
 		}
 
-		if (!strcmp(map_info.name, "qidconf_map")) {
-			xsk->qidconf_map_fd = fd;
-			continue;
-		}
-
 		if (!strcmp(map_info.name, "xsks_map")) {
 			xsk->xsks_map_fd = fd;
 			continue;
@@ -445,40 +421,18 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	}
 
 	err = 0;
-	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
+	if (xsk->xsks_map_fd == -1)
 		err = -ENOENT;
-		xsk_delete_bpf_maps(xsk);
-	}
 
 out_map_ids:
 	free(map_ids);
 	return err;
 }
 
-static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
-{
-	int qid = false;
-
-	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
-	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
-}
-
 static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 {
-	int qid = true, fd = xsk->fd, err;
-
-	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
-	if (err)
-		goto out;
-
-	err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
-	if (err)
-		goto out;
-
-	return 0;
-out:
-	xsk_clear_bpf_maps(xsk);
-	return err;
+	return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+				   &xsk->fd, 0);
 }
 
 static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
@@ -497,26 +451,27 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 			return err;
 
 		err = xsk_load_xdp_prog(xsk);
-		if (err)
-			goto out_maps;
+		if (err) {
+			xsk_delete_bpf_maps(xsk);
+			return err;
+		}
 	} else {
 		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
 		err = xsk_lookup_bpf_maps(xsk);
-		if (err)
-			goto out_load;
+		if (err) {
+			close(xsk->prog_fd);
+			return err;
+		}
 	}
 
 	err = xsk_set_bpf_maps(xsk);
-	if (err)
-		goto out_load;
+	if (err) {
+		xsk_delete_bpf_maps(xsk);
+		close(xsk->prog_fd);
+		return err;
+	}
 
 	return 0;
-
-out_load:
-	close(xsk->prog_fd);
-out_maps:
-	xsk_delete_bpf_maps(xsk);
-	return err;
 }
 
 int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
@@ -527,6 +482,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	void *rx_map = NULL, *tx_map = NULL;
 	struct sockaddr_xdp sxdp = {};
 	struct xdp_mmap_offsets off;
+	struct xdp_options opts;
 	struct xsk_socket *xsk;
 	socklen_t optlen;
 	int err;
@@ -643,8 +599,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		goto out_mmap_tx;
 	}
 
-	xsk->qidconf_map_fd = -1;
-	xsk->xsks_map_fd = -1;
+	xsk->prog_fd = -1;
+
+	optlen = sizeof(opts);
+	err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen);
+	if (err) {
+		err = -errno;
+		goto out_mmap_tx;
+	}
+
+	xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY;
 
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 		err = xsk_setup_xdp_prog(xsk);
@@ -708,8 +672,10 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	if (!xsk)
 		return;
 
-	xsk_clear_bpf_maps(xsk);
-	xsk_delete_bpf_maps(xsk);
+	if (xsk->prog_fd != -1) {
+		xsk_delete_bpf_maps(xsk);
+		close(xsk->prog_fd);
+	}
 
 	optlen = sizeof(off);
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 82ea71a0f3ec..833a6e60d065 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -167,7 +167,7 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
 
 #define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
 #define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
-#define XSK_UMEM__DEFAULT_FRAME_SHIFT    11 /* 2048 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
 #define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
 #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
 
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index dd5d69529382..90f70d2c7c22 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -22,6 +22,7 @@ test_lirc_mode2_user
 get_cgroup_id_user
 test_skb_cgroup_id_user
 test_socket_cookie
+test_cgroup_attach
 test_cgroup_storage
 test_select_reuseport
 test_flow_dissector
@@ -35,3 +36,10 @@ test_sysctl
 alu32
 libbpf.pc
 libbpf.so.*
+test_hashmap
+test_btf_dump
+xdping
+test_sockopt
+test_sockopt_sk
+test_sockopt_multi
+test_tcp_rtt
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e36356e2377e..2620406a53ec 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -15,7 +15,9 @@ LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 LLVM_READELF	?= llvm-readelf
 BTF_PAHOLE	?= pahole
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
+	  -Dbpf_prog_load=bpf_prog_test_load \
+	  -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lrt -lpthread
 
 # Order correspond to 'make run_tests' order
@@ -23,7 +25,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage test_select_reuseport test_section_names \
-	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl
+	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
+	test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
+	test_sockopt_multi test_tcp_rtt
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -43,6 +47,7 @@ TEST_PROGS := test_kmod.sh \
 	test_libbpf.sh \
 	test_xdp_redirect.sh \
 	test_xdp_meta.sh \
+	test_xdp_veth.sh \
 	test_offload.py \
 	test_sock_addr.sh \
 	test_tunnel.sh \
@@ -54,7 +59,8 @@ TEST_PROGS := test_kmod.sh \
 	test_lwt_ip_encap.sh \
 	test_tcp_check_syncookie.sh \
 	test_tc_tunnel.sh \
-	test_tc_edt.sh
+	test_tc_edt.sh \
+	test_xdping.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
@@ -79,9 +85,9 @@ $(OUTPUT)/test_maps: map_tests/*.c
 
 BPFOBJ := $(OUTPUT)/libbpf.a
 
-$(TEST_GEN_PROGS): $(BPFOBJ)
+$(TEST_GEN_PROGS): test_stub.o $(BPFOBJ)
 
-$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+$(TEST_GEN_PROGS_EXTENDED): test_stub.o $(OUTPUT)/libbpf.a
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -97,6 +103,11 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
 $(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
+$(OUTPUT)/test_sockopt: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
+$(OUTPUT)/test_tcp_rtt: cgroup_helpers.c
 
 .PHONY: force
 
@@ -177,7 +188,7 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\
 						$(ALU32_BUILD_DIR)/urandom_read
 	$(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \
 		-o $(ALU32_BUILD_DIR)/test_progs_32 \
-		test_progs.c trace_helpers.c prog_tests/*.c \
+		test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \
 		$(OUTPUT)/libbpf.a $(LDLIBS)
 
 $(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H)
@@ -275,4 +286,5 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES)
 		 ) > $(VERIFIER_TESTS_H))
 
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \
-	$(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H)
+	$(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \
+	feature
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index b25595ea4a78..05f036df8a4c 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -2,6 +2,7 @@
 #ifndef __BPF_ENDIAN__
 #define __BPF_ENDIAN__
 
+#include <linux/stddef.h>
 #include <linux/swab.h>
 
 /* LLVM's BPF target selects the endianness of the CPU
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 5f6f9e7aba2a..5a3d92c8bec8 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -8,6 +8,17 @@
  */
 #define SEC(NAME) __attribute__((section(NAME), used))
 
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) val *name
+
+/* helper macro to print out debug messages */
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			 ##__VA_ARGS__);		\
+})
+
 /* helper functions called from eBPF programs written in C */
 static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
 	(void *) BPF_FUNC_map_lookup_elem;
@@ -23,7 +34,7 @@ static int (*bpf_map_pop_elem)(void *map, void *value) =
 	(void *) BPF_FUNC_map_pop_elem;
 static int (*bpf_map_peek_elem)(void *map, void *value) =
 	(void *) BPF_FUNC_map_peek_elem;
-static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) =
 	(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
 	(void *) BPF_FUNC_ktime_get_ns;
@@ -54,7 +65,7 @@ static int (*bpf_perf_event_output)(void *ctx, void *map,
 	(void *) BPF_FUNC_perf_event_output;
 static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
 	(void *) BPF_FUNC_get_stackid;
-static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+static int (*bpf_probe_write_user)(void *dst, const void *src, int size) =
 	(void *) BPF_FUNC_probe_write_user;
 static int (*bpf_current_task_under_cgroup)(void *map, int index) =
 	(void *) BPF_FUNC_current_task_under_cgroup;
@@ -216,6 +227,7 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
 	(void *) BPF_FUNC_sk_storage_get;
 static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
 	(void *)BPF_FUNC_sk_storage_delete;
+static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index a29206ebbd13..ec219f84e041 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -6,44 +6,17 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <libbpf.h> /* libbpf_num_possible_cpus */
 
 static inline unsigned int bpf_num_possible_cpus(void)
 {
-	static const char *fcpu = "/sys/devices/system/cpu/possible";
-	unsigned int start, end, possible_cpus = 0;
-	char buff[128];
-	FILE *fp;
-	int len, n, i, j = 0;
+	int possible_cpus = libbpf_num_possible_cpus();
 
-	fp = fopen(fcpu, "r");
-	if (!fp) {
-		printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
+	if (possible_cpus < 0) {
+		printf("Failed to get # of possible cpus: '%s'!\n",
+		       strerror(-possible_cpus));
 		exit(1);
 	}
-
-	if (!fgets(buff, sizeof(buff), fp)) {
-		printf("Failed to read %s!\n", fcpu);
-		exit(1);
-	}
-
-	len = strlen(buff);
-	for (i = 0; i <= len; i++) {
-		if (buff[i] == ',' || buff[i] == '\0') {
-			buff[i] = '\0';
-			n = sscanf(&buff[j], "%u-%u", &start, &end);
-			if (n <= 0) {
-				printf("Failed to retrieve # possible CPUs!\n");
-				exit(1);
-			} else if (n == 1) {
-				end = start;
-			}
-			possible_cpus += end - start + 1;
-			j = i + 1;
-		}
-	}
-
-	fclose(fp);
-
 	return possible_cpus;
 }
 
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 6692a40a6979..e95c33e333a4 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -34,6 +34,60 @@
 		 CGROUP_WORK_DIR, path)
 
 /**
+ * enable_all_controllers() - Enable all available cgroup v2 controllers
+ *
+ * Enable all available cgroup v2 controllers in order to increase
+ * the code coverage.
+ *
+ * If successful, 0 is returned.
+ */
+int enable_all_controllers(char *cgroup_path)
+{
+	char path[PATH_MAX + 1];
+	char buf[PATH_MAX];
+	char *c, *c2;
+	int fd, cfd;
+	ssize_t len;
+
+	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		log_err("Opening cgroup.controllers: %s", path);
+		return 1;
+	}
+
+	len = read(fd, buf, sizeof(buf) - 1);
+	if (len < 0) {
+		close(fd);
+		log_err("Reading cgroup.controllers: %s", path);
+		return 1;
+	}
+	buf[len] = 0;
+	close(fd);
+
+	/* No controllers available? We're probably on cgroup v1. */
+	if (len == 0)
+		return 0;
+
+	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
+	cfd = open(path, O_RDWR);
+	if (cfd < 0) {
+		log_err("Opening cgroup.subtree_control: %s", path);
+		return 1;
+	}
+
+	for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
+		if (dprintf(cfd, "+%s\n", c) <= 0) {
+			log_err("Enabling controller %s: %s", c, path);
+			close(cfd);
+			return 1;
+		}
+	}
+	close(cfd);
+	return 0;
+}
+
+/**
  * setup_cgroup_environment() - Setup the cgroup environment
  *
  * After calling this function, cleanup_cgroup_environment should be called
@@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
 		return 1;
 	}
 
+	if (enable_all_controllers(cgroup_workdir))
+		return 1;
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
new file mode 100644
index 000000000000..a4686395522c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+ssize_t get_base_addr() {
+	size_t start;
+	char buf[256];
+	FILE *f;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f)
+		return -errno;
+
+	while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) {
+		if (strcmp(buf, "r-xp") == 0) {
+			fclose(f);
+			return start;
+		}
+	}
+
+	fclose(f);
+	return -EINVAL;
+}
+
+#ifdef __x86_64__
+#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
+#else
+#define SYS_KPROBE_NAME "sys_nanosleep"
+#endif
+
+void test_attach_probe(void)
+{
+	const char *kprobe_name = "kprobe/sys_nanosleep";
+	const char *kretprobe_name = "kretprobe/sys_nanosleep";
+	const char *uprobe_name = "uprobe/trigger_func";
+	const char *uretprobe_name = "uretprobe/trigger_func";
+	const int kprobe_idx = 0, kretprobe_idx = 1;
+	const int uprobe_idx = 2, uretprobe_idx = 3;
+	const char *file = "./test_attach_probe.o";
+	struct bpf_program *kprobe_prog, *kretprobe_prog;
+	struct bpf_program *uprobe_prog, *uretprobe_prog;
+	struct bpf_object *obj;
+	int err, prog_fd, duration = 0, res;
+	struct bpf_link *kprobe_link = NULL;
+	struct bpf_link *kretprobe_link = NULL;
+	struct bpf_link *uprobe_link = NULL;
+	struct bpf_link *uretprobe_link = NULL;
+	int results_map_fd;
+	size_t uprobe_offset;
+	ssize_t base_addr;
+
+	base_addr = get_base_addr();
+	if (CHECK(base_addr < 0, "get_base_addr",
+		  "failed to find base addr: %zd", base_addr))
+		return;
+	uprobe_offset = (size_t)&get_base_addr - base_addr;
+
+	/* load programs */
+	err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
+	if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+		return;
+
+	kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
+	if (CHECK(!kprobe_prog, "find_probe",
+		  "prog '%s' not found\n", kprobe_name))
+		goto cleanup;
+	kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
+	if (CHECK(!kretprobe_prog, "find_probe",
+		  "prog '%s' not found\n", kretprobe_name))
+		goto cleanup;
+	uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name);
+	if (CHECK(!uprobe_prog, "find_probe",
+		  "prog '%s' not found\n", uprobe_name))
+		goto cleanup;
+	uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name);
+	if (CHECK(!uretprobe_prog, "find_probe",
+		  "prog '%s' not found\n", uretprobe_name))
+		goto cleanup;
+
+	/* load maps */
+	results_map_fd = bpf_find_map(__func__, obj, "results_map");
+	if (CHECK(results_map_fd < 0, "find_results_map",
+		  "err %d\n", results_map_fd))
+		goto cleanup;
+
+	kprobe_link = bpf_program__attach_kprobe(kprobe_prog,
+						 false /* retprobe */,
+						 SYS_KPROBE_NAME);
+	if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
+		  "err %ld\n", PTR_ERR(kprobe_link))) {
+		kprobe_link = NULL;
+		goto cleanup;
+	}
+	kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog,
+						    true /* retprobe */,
+						    SYS_KPROBE_NAME);
+	if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
+		  "err %ld\n", PTR_ERR(kretprobe_link))) {
+		kretprobe_link = NULL;
+		goto cleanup;
+	}
+	uprobe_link = bpf_program__attach_uprobe(uprobe_prog,
+						 false /* retprobe */,
+						 0 /* self pid */,
+						 "/proc/self/exe",
+						 uprobe_offset);
+	if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
+		  "err %ld\n", PTR_ERR(uprobe_link))) {
+		uprobe_link = NULL;
+		goto cleanup;
+	}
+	uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog,
+						    true /* retprobe */,
+						    -1 /* any pid */,
+						    "/proc/self/exe",
+						    uprobe_offset);
+	if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
+		  "err %ld\n", PTR_ERR(uretprobe_link))) {
+		uretprobe_link = NULL;
+		goto cleanup;
+	}
+
+	/* trigger & validate kprobe && kretprobe */
+	usleep(1);
+
+	err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res);
+	if (CHECK(err, "get_kprobe_res",
+		  "failed to get kprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != kprobe_idx + 1, "check_kprobe_res",
+		  "wrong kprobe res: %d\n", res))
+		goto cleanup;
+
+	err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res);
+	if (CHECK(err, "get_kretprobe_res",
+		  "failed to get kretprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res",
+		  "wrong kretprobe res: %d\n", res))
+		goto cleanup;
+
+	/* trigger & validate uprobe & uretprobe */
+	get_base_addr();
+
+	err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res);
+	if (CHECK(err, "get_uprobe_res",
+		  "failed to get uprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != uprobe_idx + 1, "check_uprobe_res",
+		  "wrong uprobe res: %d\n", res))
+		goto cleanup;
+
+	err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res);
+	if (CHECK(err, "get_uretprobe_res",
+		  "failed to get uretprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res",
+		  "wrong uretprobe res: %d\n", res))
+		goto cleanup;
+
+cleanup:
+	bpf_link__destroy(kprobe_link);
+	bpf_link__destroy(kretprobe_link);
+	bpf_link__destroy(uprobe_link);
+	bpf_link__destroy(uretprobe_link);
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index b74e2f6e96d0..e1b55261526f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -5,14 +5,14 @@ static int libbpf_debug_print(enum libbpf_print_level level,
 			      const char *format, va_list args)
 {
 	if (level != LIBBPF_DEBUG)
-		return 0;
+		return vfprintf(stderr, format, args);
 
 	if (!strstr(format, "verifier log"))
 		return 0;
 	return vfprintf(stderr, "%s", args);
 }
 
-static int check_load(const char *file)
+static int check_load(const char *file, enum bpf_prog_type type)
 {
 	struct bpf_prog_load_attr attr;
 	struct bpf_object *obj = NULL;
@@ -20,8 +20,9 @@ static int check_load(const char *file)
 
 	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
 	attr.file = file;
-	attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+	attr.prog_type = type;
 	attr.log_level = 4;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
 	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
 	bpf_object__close(obj);
 	if (err)
@@ -31,19 +32,69 @@ static int check_load(const char *file)
 
 void test_bpf_verif_scale(void)
 {
-	const char *file1 = "./test_verif_scale1.o";
-	const char *file2 = "./test_verif_scale2.o";
-	const char *file3 = "./test_verif_scale3.o";
-	int err;
+	const char *sched_cls[] = {
+		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",
+	};
+	const char *raw_tp[] = {
+		/* full unroll by llvm */
+		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o",
+
+		/* partial unroll. llvm will unroll loop ~150 times.
+		 * C loop count -> 600.
+		 * Asm loop count -> 4.
+		 * 16k insns in loop body.
+		 * Total of 5 such loops. Total program size ~82k insns.
+		 */
+		"./pyperf600.o",
+
+		/* no unroll at all.
+		 * C loop count -> 600.
+		 * ASM loop count -> 600.
+		 * ~110 insns in loop body.
+		 * Total of 5 such loops. Total program size ~1500 insns.
+		 */
+		"./pyperf600_nounroll.o",
+
+		"./loop1.o", "./loop2.o",
+
+		/* partial unroll. 19k insn in a loop.
+		 * Total program size 20.8k insn.
+		 * ~350k processed_insns
+		 */
+		"./strobemeta.o",
+
+		/* no unroll, tiny loops */
+		"./strobemeta_nounroll1.o",
+		"./strobemeta_nounroll2.o",
+	};
+	const char *cg_sysctl[] = {
+		"./test_sysctl_loop1.o", "./test_sysctl_loop2.o",
+	};
+	int err, i;
 
 	if (verifier_stats)
 		libbpf_set_print(libbpf_debug_print);
 
-	err = check_load(file1);
-	err |= check_load(file2);
-	err |= check_load(file3);
-	if (!err)
-		printf("test_verif_scale:OK\n");
-	else
-		printf("test_verif_scale:FAIL\n");
+	err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
+	printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
+
+	for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
+		err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
+		printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
+	}
+
+	for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
+		err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
+		printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
+		err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
+		printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
+	}
+	err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
+	printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
+
+	err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
+	printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
new file mode 100644
index 000000000000..3f1ef95865ff
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+
+#ifdef __x86_64__
+#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
+#else
+#define SYS_KPROBE_NAME "sys_nanosleep"
+#endif
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+	int cpu_data = *(int *)data, duration = 0;
+	cpu_set_t *cpu_seen = ctx;
+
+	if (cpu_data != cpu)
+		CHECK(cpu_data != cpu, "check_cpu_data",
+		      "cpu_data %d != cpu %d\n", cpu_data, cpu);
+
+	CPU_SET(cpu, cpu_seen);
+}
+
+void test_perf_buffer(void)
+{
+	int err, prog_fd, nr_cpus, i, duration = 0;
+	const char *prog_name = "kprobe/sys_nanosleep";
+	const char *file = "./test_perf_buffer.o";
+	struct perf_buffer_opts pb_opts = {};
+	struct bpf_map *perf_buf_map;
+	cpu_set_t cpu_set, cpu_seen;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	struct perf_buffer *pb;
+	struct bpf_link *link;
+
+	nr_cpus = libbpf_num_possible_cpus();
+	if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
+		return;
+
+	/* load program */
+	err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
+	if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+		return;
+
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
+		goto out_close;
+
+	/* load map */
+	perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
+	if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+		goto out_close;
+
+	/* attach kprobe */
+	link = bpf_program__attach_kprobe(prog, false /* retprobe */,
+					  SYS_KPROBE_NAME);
+	if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+		goto out_close;
+
+	/* set up perf buffer */
+	pb_opts.sample_cb = on_sample;
+	pb_opts.ctx = &cpu_seen;
+	pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+		goto out_detach;
+
+	/* trigger kprobe on every CPU */
+	CPU_ZERO(&cpu_seen);
+	for (i = 0; i < nr_cpus; i++) {
+		CPU_ZERO(&cpu_set);
+		CPU_SET(i, &cpu_set);
+
+		err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
+					     &cpu_set);
+		if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
+				 i, err))
+			goto out_detach;
+
+		usleep(1);
+	}
+
+	/* read perf buffer */
+	err = perf_buffer__poll(pb, 100);
+	if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+		goto out_free_pb;
+
+	if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
+		  "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
+		goto out_free_pb;
+
+out_free_pb:
+	perf_buffer__free(pb);
+out_detach:
+	bpf_link__destroy(link);
+out_close:
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
new file mode 100644
index 000000000000..67cea1686305
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+static volatile int sigusr1_received = 0;
+
+static void sigusr1_handler(int signum)
+{
+	sigusr1_received++;
+}
+
+static int test_send_signal_common(struct perf_event_attr *attr,
+				    int prog_type,
+				    const char *test_name)
+{
+	int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd;
+	const char *file = "./test_send_signal_kern.o";
+	struct bpf_object *obj = NULL;
+	int pipe_c2p[2], pipe_p2c[2];
+	__u32 key = 0, duration = 0;
+	char buf[256];
+	pid_t pid;
+	__u64 val;
+
+	if (CHECK(pipe(pipe_c2p), test_name,
+		  "pipe pipe_c2p error: %s\n", strerror(errno)))
+		goto no_fork_done;
+
+	if (CHECK(pipe(pipe_p2c), test_name,
+		  "pipe pipe_p2c error: %s\n", strerror(errno))) {
+		close(pipe_c2p[0]);
+		close(pipe_c2p[1]);
+		goto no_fork_done;
+	}
+
+	pid = fork();
+	if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+		close(pipe_c2p[0]);
+		close(pipe_c2p[1]);
+		close(pipe_p2c[0]);
+		close(pipe_p2c[1]);
+		goto no_fork_done;
+	}
+
+	if (pid == 0) {
+		/* install signal handler and notify parent */
+		signal(SIGUSR1, sigusr1_handler);
+
+		close(pipe_c2p[0]); /* close read */
+		close(pipe_p2c[1]); /* close write */
+
+		/* notify parent signal handler is installed */
+		write(pipe_c2p[1], buf, 1);
+
+		/* make sure parent enabled bpf program to send_signal */
+		read(pipe_p2c[0], buf, 1);
+
+		/* wait a little for signal handler */
+		sleep(1);
+
+		if (sigusr1_received)
+			write(pipe_c2p[1], "2", 1);
+		else
+			write(pipe_c2p[1], "0", 1);
+
+		/* wait for parent notification and exit */
+		read(pipe_p2c[0], buf, 1);
+
+		close(pipe_c2p[1]);
+		close(pipe_p2c[0]);
+		exit(0);
+	}
+
+	close(pipe_c2p[1]); /* close write */
+	close(pipe_p2c[0]); /* close read */
+
+	err = bpf_prog_load(file, prog_type, &obj, &prog_fd);
+	if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n",
+		  strerror(errno)))
+		goto prog_load_failure;
+
+	pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
+			 -1 /* group id */, 0 /* flags */);
+	if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
+		  strerror(errno))) {
+		err = -1;
+		goto close_prog;
+	}
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n",
+		  strerror(errno)))
+		goto disable_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n",
+		  strerror(errno)))
+		goto disable_pmu;
+
+	err = -1;
+	info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map");
+	if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map"))
+		goto disable_pmu;
+
+	status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map");
+	if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map"))
+		goto disable_pmu;
+
+	/* wait until child signal handler installed */
+	read(pipe_c2p[0], buf, 1);
+
+	/* trigger the bpf send_signal */
+	key = 0;
+	val = (((__u64)(SIGUSR1)) << 32) | pid;
+	bpf_map_update_elem(info_map_fd, &key, &val, 0);
+
+	/* notify child that bpf program can send_signal now */
+	write(pipe_p2c[1], buf, 1);
+
+	/* wait for result */
+	err = read(pipe_c2p[0], buf, 1);
+	if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+		goto disable_pmu;
+	if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+		err = -1;
+		goto disable_pmu;
+	}
+
+	err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
+
+	/* notify child safe to exit */
+	write(pipe_p2c[1], buf, 1);
+
+disable_pmu:
+	close(pmu_fd);
+close_prog:
+	bpf_object__close(obj);
+prog_load_failure:
+	close(pipe_c2p[0]);
+	close(pipe_p2c[1]);
+	wait(NULL);
+no_fork_done:
+	return err;
+}
+
+static int test_send_signal_tracepoint(void)
+{
+	const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_TRACEPOINT,
+		.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN,
+		.sample_period = 1,
+		.wakeup_events = 1,
+	};
+	__u32 duration = 0;
+	int bytes, efd;
+	char buf[256];
+
+	efd = open(id_path, O_RDONLY, 0);
+	if (CHECK(efd < 0, "tracepoint",
+		  "open syscalls/sys_enter_nanosleep/id failure: %s\n",
+		  strerror(errno)))
+		return -1;
+
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
+		  "read syscalls/sys_enter_nanosleep/id failure: %s\n",
+		  strerror(errno)))
+		return -1;
+
+	attr.config = strtol(buf, NULL, 0);
+
+	return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
+}
+
+static int test_send_signal_nmi(void)
+{
+	struct perf_event_attr attr = {
+		.sample_freq = 50,
+		.freq = 1,
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+	};
+
+	return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event");
+}
+
+void test_send_signal(void)
+{
+	int ret = 0;
+
+	ret |= test_send_signal_tracepoint();
+	ret |= test_send_signal_nmi();
+	if (!ret)
+		printf("test_send_signal:OK\n");
+	else
+		printf("test_send_signal:FAIL\n");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index 3aab2b083c71..ac44fda84833 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -4,11 +4,13 @@
 void test_stacktrace_build_id(void)
 {
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *prog_name = "tracepoint/random/urandom_read";
 	const char *file = "./test_stacktrace_build_id.o";
-	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
-	struct perf_event_attr attr = {};
+	int err, prog_fd, stack_trace_len;
 	__u32 key, previous_key, val, duration = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
+	struct bpf_link *link = NULL;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -18,44 +20,16 @@ void test_stacktrace_build_id(void)
 retry:
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
-		goto out;
+		return;
 
-	/* Get the ID for the sched/sched_switch tracepoint */
-	snprintf(buf, sizeof(buf),
-		 "/sys/kernel/debug/tracing/events/random/urandom_read/id");
-	efd = open(buf, O_RDONLY, 0);
-	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
 		goto close_prog;
 
-	bytes = read(efd, buf, sizeof(buf));
-	close(efd);
-	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
-		  "read", "bytes %d errno %d\n", bytes, errno))
+	link = bpf_program__attach_tracepoint(prog, "random", "urandom_read");
+	if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
 		goto close_prog;
 
-	/* Open the perf event and attach bpf progrram */
-	attr.config = strtol(buf, NULL, 0);
-	attr.type = PERF_TYPE_TRACEPOINT;
-	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
-	attr.sample_period = 1;
-	attr.wakeup_events = 1;
-	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
-			 0 /* cpu 0 */, -1 /* group id */,
-			 0 /* flags */);
-	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
-		  pmu_fd, errno))
-		goto close_prog;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
-		  err, errno))
-		goto close_pmu;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
-		  err, errno))
-		goto disable_pmu;
-
 	/* find map fds */
 	control_map_fd = bpf_find_map(__func__, obj, "control_map");
 	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
@@ -133,8 +107,7 @@ retry:
 	 * try it one more time.
 	 */
 	if (build_id_matches < 1 && retry--) {
-		ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-		close(pmu_fd);
+		bpf_link__destroy(link);
 		bpf_object__close(obj);
 		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
 		       __func__);
@@ -152,14 +125,8 @@ retry:
 	      "err %d errno %d\n", err, errno);
 
 disable_pmu:
-	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-
-close_pmu:
-	close(pmu_fd);
+	bpf_link__destroy(link);
 
 close_prog:
 	bpf_object__close(obj);
-
-out:
-	return;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 1c1a2f75f3d8..9557b7dfb782 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -17,6 +17,7 @@ static __u64 read_perf_max_sample_freq(void)
 void test_stacktrace_build_id_nmi(void)
 {
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *prog_name = "tracepoint/random/urandom_read";
 	const char *file = "./test_stacktrace_build_id.o";
 	int err, pmu_fd, prog_fd;
 	struct perf_event_attr attr = {
@@ -25,7 +26,9 @@ void test_stacktrace_build_id_nmi(void)
 		.config = PERF_COUNT_HW_CPU_CYCLES,
 	};
 	__u32 key, previous_key, val, duration = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
+	struct bpf_link *link;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -39,6 +42,10 @@ retry:
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
 		return;
 
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
+		goto close_prog;
+
 	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
 			 0 /* cpu 0 */, -1 /* group id */,
 			 0 /* flags */);
@@ -47,15 +54,12 @@ retry:
 		  pmu_fd, errno))
 		goto close_prog;
 
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
-		  err, errno))
-		goto close_pmu;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
-		  err, errno))
-		goto disable_pmu;
+	link = bpf_program__attach_perf_event(prog, pmu_fd);
+	if (CHECK(IS_ERR(link), "attach_perf_event",
+		  "err %ld\n", PTR_ERR(link))) {
+		close(pmu_fd);
+		goto close_prog;
+	}
 
 	/* find map fds */
 	control_map_fd = bpf_find_map(__func__, obj, "control_map");
@@ -134,8 +138,7 @@ retry:
 	 * try it one more time.
 	 */
 	if (build_id_matches < 1 && retry--) {
-		ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-		close(pmu_fd);
+		bpf_link__destroy(link);
 		bpf_object__close(obj);
 		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
 		       __func__);
@@ -154,11 +157,7 @@ retry:
 	 */
 
 disable_pmu:
-	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-
-close_pmu:
-	close(pmu_fd);
-
+	bpf_link__destroy(link);
 close_prog:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 2bfd50a0d6d1..fc539335c5b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -4,50 +4,26 @@
 void test_stacktrace_map(void)
 {
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *prog_name = "tracepoint/sched/sched_switch";
+	int err, prog_fd, stack_trace_len;
 	const char *file = "./test_stacktrace_map.o";
-	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
-	struct perf_event_attr attr = {};
 	__u32 key, val, duration = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
-	char buf[256];
+	struct bpf_link *link;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
 		return;
 
-	/* Get the ID for the sched/sched_switch tracepoint */
-	snprintf(buf, sizeof(buf),
-		 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
-	efd = open(buf, O_RDONLY, 0);
-	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
 		goto close_prog;
 
-	bytes = read(efd, buf, sizeof(buf));
-	close(efd);
-	if (bytes <= 0 || bytes >= sizeof(buf))
+	link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
+	if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
 		goto close_prog;
 
-	/* Open the perf event and attach bpf progrram */
-	attr.config = strtol(buf, NULL, 0);
-	attr.type = PERF_TYPE_TRACEPOINT;
-	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
-	attr.sample_period = 1;
-	attr.wakeup_events = 1;
-	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
-			 0 /* cpu 0 */, -1 /* group id */,
-			 0 /* flags */);
-	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
-		  pmu_fd, errno))
-		goto close_prog;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (err)
-		goto disable_pmu;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-	if (err)
-		goto disable_pmu;
-
 	/* find map fds */
 	control_map_fd = bpf_find_map(__func__, obj, "control_map");
 	if (control_map_fd < 0)
@@ -96,8 +72,7 @@ void test_stacktrace_map(void)
 disable_pmu:
 	error_cnt++;
 disable_pmu_noerr:
-	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-	close(pmu_fd);
+	bpf_link__destroy(link);
 close_prog:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 1f8387d80fd7..fbfa8e76cf63 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -3,18 +3,25 @@
 
 void test_stacktrace_map_raw_tp(void)
 {
+	const char *prog_name = "tracepoint/sched/sched_switch";
 	int control_map_fd, stackid_hmap_fd, stackmap_fd;
 	const char *file = "./test_stacktrace_map.o";
-	int efd, err, prog_fd;
 	__u32 key, val, duration = 0;
+	int err, prog_fd;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
+	struct bpf_link *link = NULL;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
 		return;
 
-	efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
-	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
+		goto close_prog;
+
+	link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
+	if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
 		goto close_prog;
 
 	/* find map fds */
@@ -55,5 +62,7 @@ void test_stacktrace_map_raw_tp(void)
 close_prog:
 	error_cnt++;
 close_prog_noerr:
+	if (!IS_ERR_OR_NULL(link))
+		bpf_link__destroy(link);
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 81ad9a0b29d0..5ae485a6af3f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -57,19 +57,19 @@ struct frag_hdr {
 	__be32 identification;
 };
 
-struct bpf_map_def SEC("maps") jmp_table = {
-	.type = BPF_MAP_TYPE_PROG_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 8
-};
-
-struct bpf_map_def SEC("maps") last_dissection = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_flow_keys),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 8);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct bpf_flow_keys);
+} last_dissection SEC(".maps");
 
 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
 					    int ret)
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
new file mode 100644
index 000000000000..8f44767a75fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper tests for bitfield.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdbool.h>
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct bitfields_only_mixed_types {
+ *	int a: 3;
+ *	long int b: 2;
+ *	_Bool c: 1;
+ *	enum {
+ *		A = 0,
+ *		B = 1,
+ *	} d: 1;
+ *	short e: 5;
+ *	int: 20;
+ *	unsigned int f: 30;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct bitfields_only_mixed_types {
+	int a: 3;
+	long int b: 2;
+	bool c: 1; /* it's really a _Bool type */
+	enum {
+		A, /* A = 0, dumper is very explicit */
+		B, /* B = 1, same */
+	} d: 1;
+	short e: 5;
+	/* 20-bit padding here */
+	unsigned f: 30; /* this gets aligned on 4-byte boundary */
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct bitfield_mixed_with_others {
+ *	char: 4;
+ *	int a: 4;
+ *	short b;
+ *	long int c;
+ *	long int d: 8;
+ *	int e;
+ *	int f;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+struct bitfield_mixed_with_others {
+	long: 4; /* char is enough as a backing field */
+	int a: 4;
+	/* 8-bit implicit padding */
+	short b; /* combined with previous bitfield */
+	/* 4 more bytes of implicit padding */
+	long c;
+	long d: 8;
+	/* 24 bits implicit padding */
+	int e; /* combined with previous bitfield */
+	int f;
+	/* 4 bytes of padding */
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct bitfield_flushed {
+ *	int a: 4;
+ *	long: 60;
+ *	long int b: 16;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+struct bitfield_flushed {
+	int a: 4;
+	long: 0; /* flush until next natural alignment boundary */
+	long b: 16;
+};
+
+int f(struct {
+	struct bitfields_only_mixed_types _1;
+	struct bitfield_mixed_with_others _2;
+	struct bitfield_flushed _3;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
new file mode 100644
index 000000000000..ba97165bdb28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test for multi-dimensional array output.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+typedef int arr_t[2];
+
+typedef int multiarr_t[3][4][5];
+
+typedef int *ptr_arr_t[6];
+
+typedef int *ptr_multiarr_t[7][8][9][10];
+
+typedef int * (*fn_ptr_arr_t[11])();
+
+typedef int * (*fn_ptr_multiarr_t[12][13])();
+
+struct root_struct {
+	arr_t _1;
+	multiarr_t _2;
+	ptr_arr_t _3;
+	ptr_multiarr_t _4;
+	fn_ptr_arr_t _5;
+	fn_ptr_multiarr_t _6;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct root_struct *s)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c
new file mode 100644
index 000000000000..92a4ad428710
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test validating no name versioning happens between
+ * independent C namespaces (struct/union/enum vs typedef/enum values).
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct S {
+	int S;
+	int U;
+};
+
+typedef struct S S;
+
+union U {
+	int S;
+	int U;
+};
+
+typedef union U U;
+
+enum E {
+	V = 0,
+};
+
+typedef enum E E;
+
+struct A {};
+
+union B {};
+
+enum C {
+	A = 1,
+	B = 2,
+	C = 3,
+};
+
+struct X {};
+
+union Y {};
+
+enum Z;
+
+typedef int X;
+
+typedef int Y;
+
+typedef int Z;
+
+/*------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct {
+	struct S _1;
+	S _2;
+	union U _3;
+	U _4;
+	enum E _5;
+	E _6;
+	struct A a;
+	union B b;
+	enum C c;
+	struct X x;
+	union Y y;
+	enum Z *z;
+	X xx;
+	Y yy;
+	Z zz;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c
new file mode 100644
index 000000000000..7c95702ee4cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test for topological sorting of dependent structs.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct s1 {};
+
+struct s3;
+
+struct s4;
+
+struct s2 {
+	struct s2 *s2;
+	struct s3 *s3;
+	struct s4 *s4;
+};
+
+struct s3 {
+	struct s1 s1;
+	struct s2 s2;
+};
+
+struct s4 {
+	struct s1 s1;
+	struct s3 s3;
+};
+
+struct list_head {
+	struct list_head *next;
+	struct list_head *prev;
+};
+
+struct hlist_node {
+	struct hlist_node *next;
+	struct hlist_node **pprev;
+};
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct callback_head {
+	struct callback_head *next;
+	void (*func)(struct callback_head *);
+};
+
+struct root_struct {
+	struct s4 s4;
+	struct list_head l;
+	struct hlist_node n;
+	struct hlist_head h;
+	struct callback_head cb;
+};
+
+/*------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct root_struct *root)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
new file mode 100644
index 000000000000..1cef3bec1dc7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper tests for struct packing determination.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct packed_trailing_space {
+	int a;
+	short b;
+} __attribute__((packed));
+
+struct non_packed_trailing_space {
+	int a;
+	short b;
+};
+
+struct packed_fields {
+	short a;
+	int b;
+} __attribute__((packed));
+
+struct non_packed_fields {
+	short a;
+	int b;
+};
+
+struct nested_packed {
+	char: 4;
+	int a: 4;
+	long int b;
+	struct {
+		char c;
+		int d;
+	} __attribute__((packed)) e;
+} __attribute__((packed));
+
+union union_is_never_packed {
+	int a: 4;
+	char b;
+	char c: 1;
+};
+
+union union_does_not_need_packing {
+	struct {
+		long int a;
+		int b;
+	} __attribute__((packed));
+	int c;
+};
+
+union jump_code_union {
+	char code[5];
+	struct {
+		char jump;
+		int offset;
+	} __attribute__((packed));
+};
+
+/*------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct {
+	struct packed_trailing_space _1;
+	struct non_packed_trailing_space _2;
+	struct packed_fields _3;
+	struct non_packed_fields _4;
+	struct nested_packed _5;
+	union union_is_never_packed _6;
+	union union_does_not_need_packing _7;
+	union jump_code_union _8;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
new file mode 100644
index 000000000000..3a62119c7498
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper tests for implicit and explicit padding between fields and
+ * at the end of a struct.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct padded_implicitly {
+	int a;
+	long int b;
+	char c;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padded_explicitly {
+ *	int a;
+ *	int: 32;
+ *	int b;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct padded_explicitly {
+	int a;
+	int: 1; /* algo will explicitly pad with full 32 bits here */
+	int b;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padded_a_lot {
+ *	int a;
+ *	long: 32;
+ *	long: 64;
+ *	long: 64;
+ *	int b;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct padded_a_lot {
+	int a;
+	/* 32 bit of implicit padding here, which algo will make explicit */
+	long: 64;
+	long: 64;
+	int b;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padded_cache_line {
+ *	int a;
+ *	long: 32;
+ *	long: 64;
+ *	long: 64;
+ *	long: 64;
+ *	int b;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct padded_cache_line {
+	int a;
+	int b __attribute__((aligned(32)));
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct zone_padding {
+ *	char x[0];
+ *};
+ *
+ *struct zone {
+ *	int a;
+ *	short b;
+ *	short: 16;
+ *	struct zone_padding __pad__;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct zone_padding {
+	char x[0];
+} __attribute__((__aligned__(8)));
+
+struct zone {
+	int a;
+	short b;
+	short: 16;
+	struct zone_padding __pad__;
+};
+
+int f(struct {
+	struct padded_implicitly _1;
+	struct padded_explicitly _2;
+	struct padded_a_lot _3;
+	struct padded_cache_line _4;
+	struct zone _5;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
new file mode 100644
index 000000000000..d4a02fe44a12
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test for majority of C syntax quirks.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+enum e1 {
+	A = 0,
+	B = 1,
+};
+
+enum e2 {
+	C = 100,
+	D = -100,
+	E = 0,
+};
+
+typedef enum e2 e2_t;
+
+typedef enum {
+	F = 0,
+	G = 1,
+	H = 2,
+} e3_t;
+
+typedef int int_t;
+
+typedef volatile const int * volatile const crazy_ptr_t;
+
+typedef int *****we_need_to_go_deeper_ptr_t;
+
+typedef volatile const we_need_to_go_deeper_ptr_t * restrict * volatile * const * restrict volatile * restrict const * volatile const * restrict volatile const how_about_this_ptr_t;
+
+typedef int *ptr_arr_t[10];
+
+typedef void (*fn_ptr1_t)(int);
+
+typedef void (*printf_fn_t)(const char *, ...);
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+/*
+ * While previous function pointers are pretty trivial (C-syntax-level
+ * trivial), the following are deciphered here for future generations:
+ *
+ * - `fn_ptr2_t`: function, taking anonymous struct as a first arg and pointer
+ *   to a function, that takes int and returns int, as a second arg; returning
+ *   a pointer to a const pointer to a char. Equivalent to:
+ *	typedef struct { int a; } s_t;
+ *	typedef int (*fn_t)(int);
+ *	typedef char * const * (*fn_ptr2_t)(s_t, fn_t);
+ *
+ * - `fn_complext_t`: pointer to a function returning struct and accepting
+ *   union and struct. All structs and enum are anonymous and defined inline.
+ *
+ * - `signal_t: pointer to a function accepting a pointer to a function as an
+ *   argument and returning pointer to a function as a result. Sane equivalent:
+ *	typedef void (*signal_handler_t)(int);
+ *	typedef signal_handler_t (*signal_ptr_t)(int, signal_handler_t);
+ *
+ * - fn_ptr_arr1_t: array of pointers to a function accepting pointer to
+ *   a pointer to an int and returning pointer to a char. Easy.
+ *
+ * - fn_ptr_arr2_t: array of const pointers to a function taking no arguments
+ *   and returning a const pointer to a function, that takes pointer to a
+ *   `int -> char *` function and returns pointer to a char. Equivalent:
+ *   typedef char * (*fn_input_t)(int);
+ *   typedef char * (*fn_output_outer_t)(fn_input_t);
+ *   typedef const fn_output_outer_t (* fn_output_inner_t)();
+ *   typedef const fn_output_inner_t fn_ptr_arr2_t[5];
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+typedef char * const * (*fn_ptr2_t)(struct {
+	int a;
+}, int (*)(int));
+
+typedef struct {
+	int a;
+	void (*b)(int, struct {
+		int c;
+	}, union {
+		char d;
+		int e[5];
+	});
+} (*fn_complex_t)(union {
+	void *f;
+	char g[16];
+}, struct {
+	int h;
+});
+
+typedef void (* (*signal_t)(int, void (*)(int)))(int);
+
+typedef char * (*fn_ptr_arr1_t[10])(int **);
+
+typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+
+struct struct_w_typedefs {
+	int_t a;
+	crazy_ptr_t b;
+	we_need_to_go_deeper_ptr_t c;
+	how_about_this_ptr_t d;
+	ptr_arr_t e;
+	fn_ptr1_t f;
+	printf_fn_t g;
+	fn_ptr2_t h;
+	fn_complex_t i;
+	signal_t j;
+	fn_ptr_arr1_t k;
+	fn_ptr_arr2_t l;
+};
+
+typedef struct {
+	int x;
+	int y;
+	int z;
+} anon_struct_t;
+
+struct struct_fwd;
+
+typedef struct struct_fwd struct_fwd_t;
+
+typedef struct struct_fwd *struct_fwd_ptr_t;
+
+union union_fwd;
+
+typedef union union_fwd union_fwd_t;
+
+typedef union union_fwd *union_fwd_ptr_t;
+
+struct struct_empty {};
+
+struct struct_simple {
+	int a;
+	char b;
+	const int_t *p;
+	struct struct_empty s;
+	enum e2 e;
+	enum {
+		ANON_VAL1 = 1,
+		ANON_VAL2 = 2,
+	} f;
+	int arr1[13];
+	enum e2 arr2[5];
+};
+
+union union_empty {};
+
+union union_simple {
+	void *ptr;
+	int num;
+	int_t num2;
+	union union_empty u;
+};
+
+struct struct_in_struct {
+	struct struct_simple simple;
+	union union_simple also_simple;
+	struct {
+		int a;
+	} not_so_hard_as_well;
+	union {
+		int b;
+		int c;
+	} anon_union_is_good;
+	struct {
+		int d;
+		int e;
+	};
+	union {
+		int f;
+		int g;
+	};
+};
+
+struct struct_with_embedded_stuff {
+	int a;
+	struct {
+		int b;
+		struct {
+			struct struct_with_embedded_stuff *c;
+			const char *d;
+		} e;
+		union {
+			volatile long int f;
+			void * restrict g;
+		};
+	};
+	union {
+		const int_t *h;
+		void (*i)(char, int, void *);
+	} j;
+	enum {
+		K = 100,
+		L = 200,
+	} m;
+	char n[16];
+	struct {
+		char o;
+		int p;
+		void (*q)(int);
+	} r[5];
+	struct struct_in_struct s[10];
+	int t[11];
+};
+
+struct root_struct {
+	enum e1 _1;
+	enum e2 _2;
+	e2_t _2_1;
+	e3_t _2_2;
+	struct struct_w_typedefs _3;
+	anon_struct_t _7;
+	struct struct_fwd *_8;
+	struct_fwd_t *_9;
+	struct_fwd_ptr_t _10;
+	union union_fwd *_11;
+	union_fwd_t *_12;
+	union_fwd_ptr_t _13;
+	struct struct_with_embedded_stuff _14;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct root_struct *s)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
index 014dba10b8a5..16c54ade6888 100644
--- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
@@ -4,19 +4,19 @@
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") cg_ids = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") pidmap = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} cg_ids SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} pidmap SEC(".maps");
 
 SEC("tracepoint/syscalls/sys_enter_nanosleep")
 int trace(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
new file mode 100644
index 000000000000..dea395af9ea9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/kfree_skb")
+int nested_loops(volatile struct pt_regs* ctx)
+{
+	int i, j, sum = 0, m;
+
+	for (j = 0; j < 300; j++)
+		for (i = 0; i < j; i++) {
+			if (j & 1)
+				m = ctx->rax;
+			else
+				m = j;
+			sum += i * m;
+		}
+
+	return sum;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
new file mode 100644
index 000000000000..0637bd8e8bcf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/consume_skb")
+int while_true(volatile struct pt_regs* ctx)
+{
+	int i = 0;
+
+	while (true) {
+		if (ctx->rax & 1)
+			i += 3;
+		else
+			i += 7;
+		if (i > 40)
+			break;
+	}
+
+	return i;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
new file mode 100644
index 000000000000..30a0f6cba080
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/consume_skb")
+int while_true(volatile struct pt_regs* ctx)
+{
+	__u64 i = 0, sum = 0;
+	do {
+		i++;
+		sum += ctx->rax;
+	} while (i < 0x100000000ULL);
+	return sum;
+}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index 9f741e69cebe..38a997852cad 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -10,23 +10,17 @@
 #define REFRESH_TIME_NS	100000000
 #define NS_PER_SEC	1000000000
 
-struct bpf_map_def SEC("maps") percpu_netcnt = {
-	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
-	.key_size = sizeof(struct bpf_cgroup_storage_key),
-	.value_size = sizeof(struct percpu_net_cnt),
-};
-
-BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key,
-		     struct percpu_net_cnt);
-
-struct bpf_map_def SEC("maps") netcnt = {
-	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
-	.key_size = sizeof(struct bpf_cgroup_storage_key),
-	.value_size = sizeof(struct net_cnt),
-};
-
-BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key,
-		     struct net_cnt);
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct percpu_net_cnt);
+} percpu_netcnt SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct net_cnt);
+} netcnt SEC(".maps");
 
 SEC("cgroup/skb")
 int bpf_nextcnt(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
new file mode 100644
index 000000000000..003fe106fc70
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define FUNCTION_NAME_LEN 64
+#define FILE_NAME_LEN 128
+#define TASK_COMM_LEN 16
+
+typedef struct {
+	int PyThreadState_frame;
+	int PyThreadState_thread;
+	int PyFrameObject_back;
+	int PyFrameObject_code;
+	int PyFrameObject_lineno;
+	int PyCodeObject_filename;
+	int PyCodeObject_name;
+	int String_data;
+	int String_size;
+} OffsetConfig;
+
+typedef struct {
+	uintptr_t current_state_addr;
+	uintptr_t tls_key_addr;
+	OffsetConfig offsets;
+	bool use_tls;
+} PidData;
+
+typedef struct {
+	uint32_t success;
+} Stats;
+
+typedef struct {
+	char name[FUNCTION_NAME_LEN];
+	char file[FILE_NAME_LEN];
+} Symbol;
+
+typedef struct {
+	uint32_t pid;
+	uint32_t tid;
+	char comm[TASK_COMM_LEN];
+	int32_t kernel_stack_id;
+	int32_t user_stack_id;
+	bool thread_current;
+	bool pthread_match;
+	bool stack_complete;
+	int16_t stack_len;
+	int32_t stack[STACK_MAX_LEN];
+
+	int has_meta;
+	int metadata;
+	char dummy_safeguard;
+} Event;
+
+
+typedef int pid_t;
+
+typedef struct {
+	void* f_back; // PyFrameObject.f_back, previous frame
+	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
+	void* co_filename; // PyCodeObject.co_filename
+	void* co_name; // PyCodeObject.co_name
+} FrameData;
+
+static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
+{
+	void* thread_state;
+	int key;
+
+	bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
+	bpf_probe_read(&thread_state, sizeof(thread_state),
+		       tls_base + 0x310 + key * 0x10 + 0x08);
+	return thread_state;
+}
+
+static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
+					   FrameData *frame, Symbol *symbol)
+{
+	// read data from PyFrameObject
+	bpf_probe_read(&frame->f_back,
+		       sizeof(frame->f_back),
+		       frame_ptr + pidData->offsets.PyFrameObject_back);
+	bpf_probe_read(&frame->f_code,
+		       sizeof(frame->f_code),
+		       frame_ptr + pidData->offsets.PyFrameObject_code);
+
+	// read data from PyCodeObject
+	if (!frame->f_code)
+		return false;
+	bpf_probe_read(&frame->co_filename,
+		       sizeof(frame->co_filename),
+		       frame->f_code + pidData->offsets.PyCodeObject_filename);
+	bpf_probe_read(&frame->co_name,
+		       sizeof(frame->co_name),
+		       frame->f_code + pidData->offsets.PyCodeObject_name);
+	// read actual names into symbol
+	if (frame->co_filename)
+		bpf_probe_read_str(&symbol->file,
+				   sizeof(symbol->file),
+				   frame->co_filename + pidData->offsets.String_data);
+	if (frame->co_name)
+		bpf_probe_read_str(&symbol->name,
+				   sizeof(symbol->name),
+				   frame->co_name + pidData->offsets.String_data);
+	return true;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, PidData);
+} pidmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, Event);
+} eventmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, Symbol);
+	__type(value, int);
+} symbolmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, Stats);
+} statsmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 32);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} perfmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 1000);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(long long) * 127);
+} stackmap SEC(".maps");
+
+static __always_inline int __on_event(struct pt_regs *ctx)
+{
+	uint64_t pid_tgid = bpf_get_current_pid_tgid();
+	pid_t pid = (pid_t)(pid_tgid >> 32);
+	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
+	if (!pidData)
+		return 0;
+
+	int zero = 0;
+	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
+	if (!event)
+		return 0;
+
+	event->pid = pid;
+
+	event->tid = (pid_t)pid_tgid;
+	bpf_get_current_comm(&event->comm, sizeof(event->comm));
+
+	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
+
+	void* thread_state_current = (void*)0;
+	bpf_probe_read(&thread_state_current,
+		       sizeof(thread_state_current),
+		       (void*)(long)pidData->current_state_addr);
+
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+	void* tls_base = (void*)task;
+
+	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
+		: thread_state_current;
+	event->thread_current = thread_state == thread_state_current;
+
+	if (pidData->use_tls) {
+		uint64_t pthread_created;
+		uint64_t pthread_self;
+		bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
+
+		bpf_probe_read(&pthread_created,
+			       sizeof(pthread_created),
+			       thread_state + pidData->offsets.PyThreadState_thread);
+		event->pthread_match = pthread_created == pthread_self;
+	} else {
+		event->pthread_match = 1;
+	}
+
+	if (event->pthread_match || !pidData->use_tls) {
+		void* frame_ptr;
+		FrameData frame;
+		Symbol sym = {};
+		int cur_cpu = bpf_get_smp_processor_id();
+
+		bpf_probe_read(&frame_ptr,
+			       sizeof(frame_ptr),
+			       thread_state + pidData->offsets.PyThreadState_frame);
+
+		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
+		if (symbol_counter == NULL)
+			return 0;
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma clang loop unroll(full)
+#endif
+		/* Unwind python stack */
+		for (int i = 0; i < STACK_MAX_LEN; ++i) {
+			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
+				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+				if (!symbol_id) {
+					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
+					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+					if (!symbol_id)
+						return 0;
+				}
+				if (*symbol_id == new_symbol_id)
+					(*symbol_counter)++;
+				event->stack[i] = *symbol_id;
+				event->stack_len = i + 1;
+				frame_ptr = frame.f_back;
+			}
+		}
+		event->stack_complete = frame_ptr == NULL;
+	} else {
+		event->stack_complete = 1;
+	}
+
+	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
+	if (stats)
+		stats->success++;
+
+	event->has_meta = 0;
+	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
+	return 0;
+}
+
+SEC("raw_tracepoint/kfree_skb")
+int on_event(struct pt_regs* ctx)
+{
+	int i, ret = 0;
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf100.c b/tools/testing/selftests/bpf/progs/pyperf100.c
new file mode 100644
index 000000000000..29786325db54
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf100.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 100
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
new file mode 100644
index 000000000000..c39f559d3100
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 180
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf50.c b/tools/testing/selftests/bpf/progs/pyperf50.c
new file mode 100644
index 000000000000..ef7ce340a292
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf50.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 50
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
new file mode 100644
index 000000000000..cb49b89e37cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 600
+/* clang will not unroll the loop 600 times.
+ * Instead it will unroll it to the amount it deemed
+ * appropriate, but the loop will still execute 600 times.
+ * Total program size is around 90k insns
+ */
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
new file mode 100644
index 000000000000..6beff7502f4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 600
+#define NO_UNROLL
+/* clang will not unroll at all.
+ * Total program size is around 2k insns
+ */
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
index 9ff8ac4b0bf6..e4440fdd94cb 100644
--- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
@@ -7,25 +7,33 @@
 #include "bpf_helpers.h"
 #include "bpf_endian.h"
 
-struct bpf_map_def SEC("maps") socket_cookies = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u64),
-	.value_size = sizeof(__u32),
-	.max_entries = 1 << 8,
+struct socket_cookie {
+	__u64 cookie_key;
+	__u32 cookie_value;
 };
 
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct socket_cookie);
+} socket_cookies SEC(".maps");
+
 SEC("cgroup/connect6")
 int set_cookie(struct bpf_sock_addr *ctx)
 {
-	__u32 cookie_value = 0xFF;
-	__u64 cookie_key;
+	struct socket_cookie *p;
 
 	if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
 		return 1;
 
-	cookie_key = bpf_get_socket_cookie(ctx);
-	if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
-		return 0;
+	p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0,
+			       BPF_SK_STORAGE_GET_F_CREATE);
+	if (!p)
+		return 1;
+
+	p->cookie_value = 0xFF;
+	p->cookie_key = bpf_get_socket_cookie(ctx);
 
 	return 1;
 }
@@ -33,9 +41,8 @@ int set_cookie(struct bpf_sock_addr *ctx)
 SEC("sockops")
 int update_cookie(struct bpf_sock_ops *ctx)
 {
-	__u32 new_cookie_value;
-	__u32 *cookie_value;
-	__u64 cookie_key;
+	struct bpf_sock *sk;
+	struct socket_cookie *p;
 
 	if (ctx->family != AF_INET6)
 		return 1;
@@ -43,14 +50,17 @@ int update_cookie(struct bpf_sock_ops *ctx)
 	if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
 		return 1;
 
-	cookie_key = bpf_get_socket_cookie(ctx);
+	if (!ctx->sk)
+		return 1;
+
+	p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, 0);
+	if (!p)
+		return 1;
 
-	cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
-	if (!cookie_value)
+	if (p->cookie_key != bpf_get_socket_cookie(ctx))
 		return 1;
 
-	new_cookie_value = (ctx->local_port << 8) | *cookie_value;
-	bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+	p->cookie_value = (ctx->local_port << 8) | p->cookie_value;
 
 	return 1;
 }
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index 0f92858f6226..9390e0244259 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -1,17 +1,9 @@
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 #include "bpf_endian.h"
 
 int _version SEC("version") = 1;
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index 12a7b5c82ed6..e80484d98a1a 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -1,17 +1,10 @@
 #include <linux/bpf.h>
+
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 #include "bpf_endian.h"
 
 int _version SEC("version") = 1;
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sk_msg1")
 int bpf_prog1(struct sk_msg_md *msg)
 {
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index 2ce7634a4012..433e23918a62 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -1,44 +1,36 @@
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 #include "bpf_endian.h"
 
 int _version SEC("version") = 1;
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_rx SEC(".maps");
 
-struct bpf_map_def SEC("maps") sock_map_rx = {
-	.type = BPF_MAP_TYPE_SOCKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_tx SEC(".maps");
 
-struct bpf_map_def SEC("maps") sock_map_tx = {
-	.type = BPF_MAP_TYPE_SOCKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_msg SEC(".maps");
 
-struct bpf_map_def SEC("maps") sock_map_msg = {
-	.type = BPF_MAP_TYPE_SOCKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_break = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 20);
+	__type(key, int);
+	__type(value, int);
+} sock_map_break SEC(".maps");
 
 SEC("sk_skb2")
 int bpf_prog2(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
new file mode 100644
index 000000000000..4afd2595c08e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+SEC("cgroup/getsockopt/child")
+int _getsockopt_child(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+		return 1;
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	if (optval[0] != 0x80)
+		return 0; /* EPERM, unexpected optval from the kernel */
+
+	ctx->retval = 0; /* Reset system call return value to zero */
+
+	optval[0] = 0x90;
+	ctx->optlen = 1;
+
+	return 1;
+}
+
+SEC("cgroup/getsockopt/parent")
+int _getsockopt_parent(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+		return 1;
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	if (optval[0] != 0x90)
+		return 0; /* EPERM, unexpected optval from the kernel */
+
+	ctx->retval = 0; /* Reset system call return value to zero */
+
+	optval[0] = 0xA0;
+	ctx->optlen = 1;
+
+	return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+		return 1;
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	optval[0] += 0x10;
+	ctx->optlen = 1;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
new file mode 100644
index 000000000000..076122c898e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+#define SOL_CUSTOM			0xdeadbeef
+
+struct sockopt_sk {
+	__u8 val;
+};
+
+struct bpf_map_def SEC("maps") socket_storage_map = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct sockopt_sk),
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk);
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+	struct sockopt_sk *storage;
+
+	if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+		/* Not interested in SOL_IP:IP_TOS;
+		 * let next BPF program in the cgroup chain or kernel
+		 * handle it.
+		 */
+		return 1;
+
+	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
+		/* Not interested in SOL_SOCKET:SO_SNDBUF;
+		 * let next BPF program in the cgroup chain or kernel
+		 * handle it.
+		 */
+		return 1;
+	}
+
+	if (ctx->level != SOL_CUSTOM)
+		return 0; /* EPERM, deny everything except custom level */
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0; /* EPERM, couldn't get sk storage */
+
+	if (!ctx->retval)
+		return 0; /* EPERM, kernel should not have handled
+			   * SOL_CUSTOM, something is wrong!
+			   */
+	ctx->retval = 0; /* Reset system call return value to zero */
+
+	optval[0] = storage->val;
+	ctx->optlen = 1;
+
+	return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+	struct sockopt_sk *storage;
+
+	if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+		/* Not interested in SOL_IP:IP_TOS;
+		 * let next BPF program in the cgroup chain or kernel
+		 * handle it.
+		 */
+		return 1;
+
+	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
+		/* Overwrite SO_SNDBUF value */
+
+		if (optval + sizeof(__u32) > optval_end)
+			return 0; /* EPERM, bounds check */
+
+		*(__u32 *)optval = 0x55AA;
+		ctx->optlen = 4;
+
+		return 1;
+	}
+
+	if (ctx->level != SOL_CUSTOM)
+		return 0; /* EPERM, deny everything except custom level */
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0; /* EPERM, couldn't get sk storage */
+
+	storage->val = optval[0];
+	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
+			   * setsockopt handler.
+			   */
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.c b/tools/testing/selftests/bpf/progs/strobemeta.c
new file mode 100644
index 000000000000..d3df3d86f092
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 100
+#define STROBE_MAX_MAP_ENTRIES 20
+/* full unroll by llvm #undef NO_UNROLL */
+#include "strobemeta.h"
+
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
new file mode 100644
index 000000000000..8a399bdfd920
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include "bpf_helpers.h"
+
+typedef uint32_t pid_t;
+struct task_struct {};
+
+#define TASK_COMM_LEN 16
+#define PERF_MAX_STACK_DEPTH 127
+
+#define STROBE_TYPE_INVALID 0
+#define STROBE_TYPE_INT 1
+#define STROBE_TYPE_STR 2
+#define STROBE_TYPE_MAP 3
+
+#define STACK_TABLE_EPOCH_SHIFT 20
+#define STROBE_MAX_STR_LEN 1
+#define STROBE_MAX_CFGS 32
+#define STROBE_MAX_PAYLOAD						\
+	(STROBE_MAX_STRS * STROBE_MAX_STR_LEN +				\
+	STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+
+struct strobe_value_header {
+	/*
+	 * meaning depends on type:
+	 * 1. int: 0, if value not set, 1 otherwise
+	 * 2. str: 1 always, whether value is set or not is determined by ptr
+	 * 3. map: 1 always, pointer points to additional struct with number
+	 *    of entries (up to STROBE_MAX_MAP_ENTRIES)
+	 */
+	uint16_t len;
+	/*
+	 * _reserved might be used for some future fields/flags, but we always
+	 * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
+	 * bytes in one go and get both header and value
+	 */
+	uint8_t _reserved[6];
+};
+
+/*
+ * strobe_value_generic is used from BPF probe only, but needs to be a union
+ * of strobe_value_int/strobe_value_str/strobe_value_map
+ */
+struct strobe_value_generic {
+	struct strobe_value_header header;
+	union {
+		int64_t val;
+		void *ptr;
+	};
+};
+
+struct strobe_value_int {
+	struct strobe_value_header header;
+	int64_t value;
+};
+
+struct strobe_value_str {
+	struct strobe_value_header header;
+	const char* value;
+};
+
+struct strobe_value_map {
+	struct strobe_value_header header;
+	const struct strobe_map_raw* value;
+};
+
+struct strobe_map_entry {
+	const char* key;
+	const char* val;
+};
+
+/*
+ * Map of C-string key/value pairs with fixed maximum capacity. Each map has
+ * corresponding int64 ID, which application can use (or ignore) in whatever
+ * way appropriate. Map is "write-only", there is no way to get data out of
+ * map. Map is intended to be used to provide metadata for profilers and is
+ * not to be used for internal in-app communication. All methods are
+ * thread-safe.
+ */
+struct strobe_map_raw {
+	/*
+	 * general purpose unique ID that's up to application to decide
+	 * whether and how to use; for request metadata use case id is unique
+	 * request ID that's used to match metadata with stack traces on
+	 * Strobelight backend side
+	 */
+	int64_t id;
+	/* number of used entries in map */
+	int64_t cnt;
+	/*
+	 * having volatile doesn't change anything on BPF side, but clang
+	 * emits warnings for passing `volatile const char *` into
+	 * bpf_probe_read_str that expects just `const char *`
+	 */
+	const char* tag;
+	/*
+	 * key/value entries, each consisting of 2 pointers to key and value
+	 * C strings
+	 */
+	struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
+};
+
+/* Following values define supported values of TLS mode */
+#define TLS_NOT_SET -1
+#define TLS_LOCAL_EXEC 0
+#define TLS_IMM_EXEC 1
+#define TLS_GENERAL_DYN 2
+
+/*
+ * structure that universally represents TLS location (both for static
+ * executables and shared libraries)
+ */
+struct strobe_value_loc {
+	/*
+	 * tls_mode defines what TLS mode was used for particular metavariable:
+	 * - -1 (TLS_NOT_SET) - no metavariable;
+	 * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
+	 * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
+	 * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
+	 * Local Dynamic mode is not yet supported, because never seen in
+	 * practice.  Mode defines how offset field is interpreted. See
+	 * calc_location() in below for details.
+	 */
+	int64_t tls_mode;
+	/*
+	 * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
+	 * tpidr_el0 for aarch64).
+	 * TLS_IMM_EXEC: absolute address of GOT entry containing offset
+	 * from thread pointer;
+	 * TLS_GENERAL_DYN: absolute addres of double GOT entry
+	 * containing tls_index_t struct;
+	 */
+	int64_t offset;
+};
+
+struct strobemeta_cfg {
+	int64_t req_meta_idx;
+	struct strobe_value_loc int_locs[STROBE_MAX_INTS];
+	struct strobe_value_loc str_locs[STROBE_MAX_STRS];
+	struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
+};
+
+struct strobe_map_descr {
+	uint64_t id;
+	int16_t tag_len;
+	/*
+	 * cnt <0 - map value isn't set;
+	 * 0 - map has id set, but no key/value entries
+	 */
+	int16_t cnt;
+	/*
+	 * both key_lens[i] and val_lens[i] should be >0 for present key/value
+	 * entry
+	 */
+	uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
+	uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
+};
+
+struct strobemeta_payload {
+	/* req_id has valid request ID, if req_meta_valid == 1 */
+	int64_t req_id;
+	uint8_t req_meta_valid;
+	/*
+	 * mask has Nth bit set to 1, if Nth metavar was present and
+	 * successfully read
+	 */
+	uint64_t int_vals_set_mask;
+	int64_t int_vals[STROBE_MAX_INTS];
+	/* len is >0 for present values */
+	uint16_t str_lens[STROBE_MAX_STRS];
+	/* if map_descrs[i].cnt == -1, metavar is not present/set */
+	struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
+	/*
+	 * payload has compactly packed values of str and map variables in the
+	 * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
+	 * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
+	 * value length
+	 */
+	char payload[STROBE_MAX_PAYLOAD];
+};
+
+struct strobelight_bpf_sample {
+	uint64_t ktime;
+	char comm[TASK_COMM_LEN];
+	pid_t pid;
+	int user_stack_id;
+	int kernel_stack_id;
+	int has_meta;
+	struct strobemeta_payload metadata;
+	/*
+	 * makes it possible to pass (<real payload size> + 1) as data size to
+	 * perf_submit() to avoid perf_submit's paranoia about passing zero as
+	 * size, as it deduces that <real payload size> might be
+	 * **theoretically** zero
+	 */
+	char dummy_safeguard;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 32);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} samples SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
+} stacks_0 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
+} stacks_1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, uint32_t);
+	__type(value, struct strobelight_bpf_sample);
+} sample_heap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, STROBE_MAX_CFGS);
+	__type(key, pid_t);
+	__type(value, struct strobemeta_cfg);
+} strobemeta_cfgs SEC(".maps");
+
+/* Type for the dtv.  */
+/* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
+typedef union dtv {
+	size_t counter;
+	struct {
+		void* val;
+		bool is_static;
+	} pointer;
+} dtv_t;
+
+/* Partial definition for tcbhead_t */
+/* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
+struct tcbhead {
+	void* tcb;
+	dtv_t* dtv;
+};
+
+/*
+ * TLS module/offset information for shared library case.
+ * For x86-64, this is mapped onto two entries in GOT.
+ * For aarch64, this is pointed to by second GOT entry.
+ */
+struct tls_index {
+	uint64_t module;
+	uint64_t offset;
+};
+
+static __always_inline void *calc_location(struct strobe_value_loc *loc,
+					   void *tls_base)
+{
+	/*
+	 * tls_mode value is:
+	 * - -1 (TLS_NOT_SET), if no metavar is present;
+	 * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
+	 * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
+	 * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
+	 * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
+	 * This schema allows to use something like:
+	 * (tls_mode + 1) * (tls_base + offset)
+	 * to get NULL for "no metavar" location, or correct pointer for local
+	 * executable mode without doing extra ifs.
+	 */
+	if (loc->tls_mode <= TLS_LOCAL_EXEC) {
+		/* static executable is simple, we just have offset from
+		 * tls_base */
+		void *addr = tls_base + loc->offset;
+		/* multiply by (tls_mode + 1) to get NULL, if we have no
+		 * metavar in this slot */
+		return (void *)((loc->tls_mode + 1) * (int64_t)addr);
+	}
+	/*
+	 * Other modes are more complicated, we need to jump through few hoops.
+	 *
+	 * For immediate executable mode (currently supported only for aarch64):
+	 *  - loc->offset is pointing to a GOT entry containing fixed offset
+	 *  relative to tls_base;
+	 *
+	 * For general dynamic mode:
+	 *  - loc->offset is pointing to a beginning of double GOT entries;
+	 *  - (for aarch64 only) second entry points to tls_index_t struct;
+	 *  - (for x86-64 only) two GOT entries are already tls_index_t;
+	 *  - tls_index_t->module is used to find start of TLS section in
+	 *  which variable resides;
+	 *  - tls_index_t->offset provides offset within that TLS section,
+	 *  pointing to value of variable.
+	 */
+	struct tls_index tls_index;
+	dtv_t *dtv;
+	void *tls_ptr;
+
+	bpf_probe_read(&tls_index, sizeof(struct tls_index),
+		       (void *)loc->offset);
+	/* valid module index is always positive */
+	if (tls_index.module > 0) {
+		/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
+		bpf_probe_read(&dtv, sizeof(dtv),
+			       &((struct tcbhead *)tls_base)->dtv);
+		dtv += tls_index.module;
+	} else {
+		dtv = NULL;
+	}
+	bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
+	/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
+	return tls_ptr && tls_ptr != (void *)-1
+		? tls_ptr + tls_index.offset
+		: NULL;
+}
+
+static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
+					 size_t idx, void *tls_base,
+					 struct strobe_value_generic *value,
+					 struct strobemeta_payload *data)
+{
+	void *location = calc_location(&cfg->int_locs[idx], tls_base);
+	if (!location)
+		return;
+
+	bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+	data->int_vals[idx] = value->val;
+	if (value->header.len)
+		data->int_vals_set_mask |= (1 << idx);
+}
+
+static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
+					     size_t idx, void *tls_base,
+					     struct strobe_value_generic *value,
+					     struct strobemeta_payload *data,
+					     void *payload)
+{
+	void *location;
+	uint32_t len;
+
+	data->str_lens[idx] = 0;
+	location = calc_location(&cfg->str_locs[idx], tls_base);
+	if (!location)
+		return 0;
+
+	bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+	len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+	/*
+	 * if bpf_probe_read_str returns error (<0), due to casting to
+	 * unsinged int, it will become big number, so next check is
+	 * sufficient to check for errors AND prove to BPF verifier, that
+	 * bpf_probe_read_str won't return anything bigger than
+	 * STROBE_MAX_STR_LEN
+	 */
+	if (len > STROBE_MAX_STR_LEN)
+		return 0;
+
+	data->str_lens[idx] = len;
+	return len;
+}
+
+static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
+					  size_t idx, void *tls_base,
+					  struct strobe_value_generic *value,
+					  struct strobemeta_payload *data,
+					  void *payload)
+{
+	struct strobe_map_descr* descr = &data->map_descrs[idx];
+	struct strobe_map_raw map;
+	void *location;
+	uint32_t len;
+	int i;
+
+	descr->tag_len = 0; /* presume no tag is set */
+	descr->cnt = -1; /* presume no value is set */
+
+	location = calc_location(&cfg->map_locs[idx], tls_base);
+	if (!location)
+		return payload;
+
+	bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+	if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
+		return payload;
+
+	descr->id = map.id;
+	descr->cnt = map.cnt;
+	if (cfg->req_meta_idx == idx) {
+		data->req_id = map.id;
+		data->req_meta_valid = 1;
+	}
+
+	len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
+	if (len <= STROBE_MAX_STR_LEN) {
+		descr->tag_len = len;
+		payload += len;
+	}
+
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) {
+		descr->key_lens[i] = 0;
+		len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
+					 map.entries[i].key);
+		if (len <= STROBE_MAX_STR_LEN) {
+			descr->key_lens[i] = len;
+			payload += len;
+		}
+		descr->val_lens[i] = 0;
+		len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
+					 map.entries[i].val);
+		if (len <= STROBE_MAX_STR_LEN) {
+			descr->val_lens[i] = len;
+			payload += len;
+		}
+	}
+
+	return payload;
+}
+
+/*
+ * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
+ * pointer to *right after* payload ends
+ */
+static __always_inline void *read_strobe_meta(struct task_struct *task,
+					      struct strobemeta_payload *data)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	struct strobe_value_generic value = {0};
+	struct strobemeta_cfg *cfg;
+	void *tls_base, *payload;
+
+	cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
+	if (!cfg)
+		return NULL;
+
+	data->int_vals_set_mask = 0;
+	data->req_meta_valid = 0;
+	payload = data->payload;
+	/*
+	 * we don't have struct task_struct definition, it should be:
+	 * tls_base = (void *)task->thread.fsbase;
+	 */
+	tls_base = (void *)task;
+
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_INTS; ++i) {
+		read_int_var(cfg, i, tls_base, &value, data);
+	}
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_STRS; ++i) {
+		payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+	}
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
+		payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+	}
+	/*
+	 * return pointer right after end of payload, so it's possible to
+	 * calculate exact amount of useful data that needs to be sent
+	 */
+	return payload;
+}
+
+SEC("raw_tracepoint/kfree_skb")
+int on_event(struct pt_regs *ctx) {
+	pid_t pid =  bpf_get_current_pid_tgid() >> 32;
+	struct strobelight_bpf_sample* sample;
+	struct task_struct *task;
+	uint32_t zero = 0;
+	uint64_t ktime_ns;
+	void *sample_end;
+
+	sample = bpf_map_lookup_elem(&sample_heap, &zero);
+	if (!sample)
+		return 0; /* this will never happen */
+
+	sample->pid = pid;
+	bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
+	ktime_ns = bpf_ktime_get_ns();
+	sample->ktime = ktime_ns;
+
+	task = (struct task_struct *)bpf_get_current_task();
+	sample_end = read_strobe_meta(task, &sample->metadata);
+	sample->has_meta = sample_end != NULL;
+	sample_end = sample_end ? : &sample->metadata;
+
+	if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
+		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
+		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
+	} else {
+		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
+		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
+	}
+
+	uint64_t sample_size = sample_end - (void *)sample;
+	/* should always be true */
+	if (sample_size < sizeof(struct strobelight_bpf_sample))
+		bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
new file mode 100644
index 000000000000..f0a1669e11d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
new file mode 100644
index 000000000000..4291a7d642e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 30
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
new file mode 100644
index 000000000000..233bdcb1659e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+struct tcp_rtt_storage {
+	__u32 invoked;
+	__u32 dsack_dups;
+	__u32 delivered;
+	__u32 delivered_ce;
+	__u32 icsk_retransmits;
+};
+
+struct bpf_map_def SEC("maps") socket_storage_map = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct tcp_rtt_storage),
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage);
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+	struct tcp_rtt_storage *storage;
+	struct bpf_tcp_sock *tcp_sk;
+	int op = (int) ctx->op;
+	struct bpf_sock *sk;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 1;
+
+	if (op == BPF_SOCK_OPS_TCP_CONNECT_CB) {
+		bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+		return 1;
+	}
+
+	if (op != BPF_SOCK_OPS_RTT_CB)
+		return 1;
+
+	tcp_sk = bpf_tcp_sock(sk);
+	if (!tcp_sk)
+		return 1;
+
+	storage->invoked++;
+
+	storage->dsack_dups = tcp_sk->dsack_dups;
+	storage->delivered = tcp_sk->delivered;
+	storage->delivered_ce = tcp_sk->delivered_ce;
+	storage->icsk_retransmits = tcp_sk->icsk_retransmits;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
new file mode 100644
index 000000000000..63a8dfef893b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, int);
+	__type(value, int);
+} results_map SEC(".maps");
+
+SEC("kprobe/sys_nanosleep")
+int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+{
+	const int key = 0, value = 1;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+SEC("kretprobe/sys_nanosleep")
+int handle_sys_getpid_return(struct pt_regs *ctx)
+{
+	const int key = 1, value = 2;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+SEC("uprobe/trigger_func")
+int handle_uprobe_entry(struct pt_regs *ctx)
+{
+	const int key = 2, value = 3;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+SEC("uretprobe/trigger_func")
+int handle_uprobe_return(struct pt_regs *ctx)
+{
+	const int key = 3, value = 4;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
new file mode 100644
index 000000000000..5ee3622ddebb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+/* just to validate we can handle maps in multiple sections */
+struct bpf_map_def SEC("maps") btf_map_legacy = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(long long),
+	.max_entries = 4,
+};
+
+BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, int);
+	__type(value, struct ipv_counts);
+} btf_map SEC(".maps");
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	/* just verify we can reference both maps */
+	counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
+	if (!counts)
+		return 0;
+
+	return 0;
+}
+
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_1(arg);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index f6d9f238e00a..d06b47a09097 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -15,19 +15,19 @@ struct stack_trace_t {
 	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
 };
 
-struct bpf_map_def SEC("maps") perfmap = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(__u32),
-	.max_entries = 2,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 2);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(__u32));
+} perfmap SEC(".maps");
 
-struct bpf_map_def SEC("maps") stackdata_map = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct stack_trace_t),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct stack_trace_t);
+} stackdata_map SEC(".maps");
 
 /* Allocate per-cpu space twice the needed. For the code below
  *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
@@ -47,12 +47,12 @@ struct bpf_map_def SEC("maps") stackdata_map = {
  * issue and avoid complicated C programming massaging.
  * This is an acceptable workaround since there is one entry here.
  */
-struct bpf_map_def SEC("maps") rawdata_map = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__u64 (*value)[2 * MAX_STACK_RAWTP];
+} rawdata_map SEC(".maps");
 
 SEC("tracepoint/raw_syscalls/sys_enter")
 int bpf_prog1(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 5ab14e941980..32a6073acb99 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -7,19 +7,19 @@
 
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") result_number = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(__u32),
-	.value_size	= sizeof(__u64),
-	.max_entries	= 11,
-};
-
-struct bpf_map_def SEC("maps") result_string = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(__u32),
-	.value_size	= 32,
-	.max_entries	= 5,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 11);
+	__type(key, __u32);
+	__type(value, __u64);
+} result_number SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 5);
+	__type(key, __u32);
+	const char (*value)[32];
+} result_string SEC(".maps");
 
 struct foo {
 	__u8  a;
@@ -27,12 +27,12 @@ struct foo {
 	__u64 c;
 };
 
-struct bpf_map_def SEC("maps") result_struct = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(__u32),
-	.value_size	= sizeof(struct foo),
-	.max_entries	= 5,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 5);
+	__type(key, __u32);
+	__type(value, struct foo);
+} result_struct SEC(".maps");
 
 /* Relocation tests for __u64s. */
 static       __u64 num0;
diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h
index 3d12c11a8d47..c300734d26f6 100644
--- a/tools/testing/selftests/bpf/progs/test_jhash.h
+++ b/tools/testing/selftests/bpf/progs/test_jhash.h
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
+#include <features.h>
 
 typedef unsigned int u32;
 
-static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift)
+static __always_inline u32 rol32(u32 word, unsigned int shift)
 {
 	return (word << shift) | (word >> ((-shift) & 31));
 }
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 1e10c9590991..1d652ee8e73d 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -169,40 +169,40 @@ struct eth_hdr {
 	unsigned short eth_proto;
 };
 
-struct bpf_map_def SEC("maps") vip_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip),
-	.value_size = sizeof(struct vip_meta),
-	.max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ch_rings = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = CH_RINGS_SIZE,
-};
-
-struct bpf_map_def SEC("maps") reals = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct real_definition),
-	.max_entries = MAX_REALS,
-};
-
-struct bpf_map_def SEC("maps") stats = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct vip_stats),
-	.max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ctl_array = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct ctl_value),
-	.max_entries = CTL_MAP_SIZE,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, struct vip);
+	__type(value, struct vip_meta);
+} vip_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CH_RINGS_SIZE);
+	__type(key, __u32);
+	__type(value, __u32);
+} ch_rings SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, MAX_REALS);
+	__type(key, __u32);
+	__type(value, struct real_definition);
+} reals SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, __u32);
+	__type(value, struct vip_stats);
+} stats SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CTL_MAP_SIZE);
+	__type(key, __u32);
+	__type(value, struct ctl_value);
+} ctl_array SEC(".maps");
 
 static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
 					     bool ipv6)
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index ba44a14e6dc4..2e4efe70b1e5 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -165,40 +165,40 @@ struct eth_hdr {
 	unsigned short eth_proto;
 };
 
-struct bpf_map_def SEC("maps") vip_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip),
-	.value_size = sizeof(struct vip_meta),
-	.max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ch_rings = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = CH_RINGS_SIZE,
-};
-
-struct bpf_map_def SEC("maps") reals = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct real_definition),
-	.max_entries = MAX_REALS,
-};
-
-struct bpf_map_def SEC("maps") stats = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct vip_stats),
-	.max_entries = MAX_VIPS,
-};
-
-struct bpf_map_def SEC("maps") ctl_array = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct ctl_value),
-	.max_entries = CTL_MAP_SIZE,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, struct vip);
+	__type(value, struct vip_meta);
+} vip_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CH_RINGS_SIZE);
+	__type(key, __u32);
+	__type(value, __u32);
+} ch_rings SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, MAX_REALS);
+	__type(key, __u32);
+	__type(value, struct real_definition);
+} reals SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, __u32);
+	__type(value, struct vip_stats);
+} stats SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CTL_MAP_SIZE);
+	__type(key, __u32);
+	__type(value, struct ctl_value);
+} ctl_array SEC(".maps");
 
 static __u32 get_packet_hash(struct packet_description *pckt,
 			     bool ipv6)
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index e2f6ed0a583d..a334a0e882e4 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -6,13 +6,6 @@
 #include "bpf_helpers.h"
 #include "bpf_endian.h"
 
-#define bpf_printk(fmt, ...)				\
-({							\
-	char ____fmt[] = fmt;				\
-	bpf_trace_printk(____fmt, sizeof(____fmt),	\
-			##__VA_ARGS__);			\
-})
-
 /* Packet parsing state machine helpers. */
 #define cursor_advance(_cursor, _len) \
 	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 2985f262846e..113226115365 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -5,23 +5,23 @@
 #include <linux/types.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") mim_array = {
-	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
-	.key_size = sizeof(int),
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(map_flags, 0);
+	__uint(key_size, sizeof(__u32));
 	/* must be sizeof(__u32) for map in map */
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-	.map_flags = 0,
-};
-
-struct bpf_map_def SEC("maps") mim_hash = {
-	.type = BPF_MAP_TYPE_HASH_OF_MAPS,
-	.key_size = sizeof(int),
+	__uint(value_size, sizeof(__u32));
+} mim_array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(map_flags, 0);
+	__uint(key_size, sizeof(int));
 	/* must be sizeof(__u32) for map in map */
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-	.map_flags = 0,
-};
+	__uint(value_size, sizeof(__u32));
+} mim_hash SEC(".maps");
 
 SEC("xdp_mimtest")
 int xdp_mimtest0(struct xdp_md *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index af8cc68ed2f9..bb7ce35f691b 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -11,28 +11,24 @@ struct hmap_elem {
 	int var[VAR_NUM];
 };
 
-struct bpf_map_def SEC("maps") hash_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct hmap_elem),
-	.max_entries = 1,
-};
-
-BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem);
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct hmap_elem);
+} hash_map SEC(".maps");
 
 struct array_elem {
 	struct bpf_spin_lock lock;
 	int var[VAR_NUM];
 };
 
-struct bpf_map_def SEC("maps") array_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct array_elem),
-	.max_entries = 1,
-};
-
-BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem);
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct array_elem);
+} array_map SEC(".maps");
 
 SEC("map_lock_demo")
 int bpf_map_lock_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index 726340fa6fe0..3d30c02bdae9 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -13,12 +13,12 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def SEC("maps") test_map_id = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} test_map_id SEC(".maps");
 
 SEC("test_obj_id_dummy")
 int test_obj_id(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
new file mode 100644
index 000000000000..876c27deb65a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
+SEC("kprobe/sys_nanosleep")
+int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+{
+	int cpu = bpf_get_smp_processor_id();
+
+	bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU,
+			      &cpu, sizeof(cpu));
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
new file mode 100644
index 000000000000..1dbe1d4d467e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -0,0 +1,262 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+				0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+				0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+	unsigned int ver:4;
+	unsigned int priority:8;
+	unsigned int flow_label:20;
+	unsigned short payload_len;
+	unsigned char next_header;
+	unsigned char hop_limit;
+	unsigned long long src_hi;
+	unsigned long long src_lo;
+	unsigned long long dst_hi;
+	unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+	unsigned long long hi;
+	unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+	unsigned char nexthdr;
+	unsigned char hdrlen;
+	unsigned char type;
+	unsigned char segments_left;
+	unsigned char first_segment;
+	unsigned char flags;
+	unsigned short tag;
+
+	struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+	unsigned char type;
+	unsigned char len;
+	unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+	void *cursor, *data_end;
+	struct ip6_srh_t *srh;
+	struct ip6_t *ip;
+	uint8_t *ipver;
+
+	data_end = (void *)(long)skb->data_end;
+	cursor = (void *)(long)skb->data;
+	ipver = (uint8_t *)cursor;
+
+	if ((void *)ipver + sizeof(*ipver) > data_end)
+		return NULL;
+
+	if ((*ipver >> 4) != 6)
+		return NULL;
+
+	ip = cursor_advance(cursor, sizeof(*ip));
+	if ((void *)ip + sizeof(*ip) > data_end)
+		return NULL;
+
+	if (ip->next_header != 43)
+		return NULL;
+
+	srh = cursor_advance(cursor, sizeof(*srh));
+	if ((void *)srh + sizeof(*srh) > data_end)
+		return NULL;
+
+	if (srh->type != 4)
+		return NULL;
+
+	return srh;
+}
+
+static __always_inline int update_tlv_pad(struct __sk_buff *skb,
+					  uint32_t new_pad, uint32_t old_pad,
+					  uint32_t pad_off)
+{
+	int err;
+
+	if (new_pad != old_pad) {
+		err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+					  (int) new_pad - (int) old_pad);
+		if (err)
+			return err;
+	}
+
+	if (new_pad > 0) {
+		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0};
+		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+		pad_tlv->type = SR6_TLV_PADDING;
+		pad_tlv->len = new_pad - 2;
+
+		err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+					       (void *)pad_tlv_buf, new_pad);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
+						 struct ip6_srh_t *srh,
+						 uint32_t *tlv_off,
+						 uint32_t *pad_size,
+						 uint32_t *pad_off)
+{
+	uint32_t srh_off, cur_off;
+	int offset_valid = 0;
+	int err;
+
+	srh_off = (char *)srh - (char *)(long)skb->data;
+	// cur_off = end of segments, start of possible TLVs
+	cur_off = srh_off + sizeof(*srh) +
+		sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+	*pad_off = 0;
+
+	// we can only go as far as ~10 TLVs due to the BPF max stack size
+	#pragma clang loop unroll(disable)
+	for (int i = 0; i < 100; i++) {
+		struct sr6_tlv_t tlv;
+
+		if (cur_off == *tlv_off)
+			offset_valid = 1;
+
+		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+			break;
+
+		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+		if (err)
+			return err;
+
+		if (tlv.type == SR6_TLV_PADDING) {
+			*pad_size = tlv.len + sizeof(tlv);
+			*pad_off = cur_off;
+
+			if (*tlv_off == srh_off) {
+				*tlv_off = cur_off;
+				offset_valid = 1;
+			}
+			break;
+
+		} else if (tlv.type == SR6_TLV_HMAC) {
+			break;
+		}
+
+		cur_off += sizeof(tlv) + tlv.len;
+	} // we reached the padding or HMAC TLVs, or the end of the SRH
+
+	if (*pad_off == 0)
+		*pad_off = cur_off;
+
+	if (*tlv_off == -1)
+		*tlv_off = cur_off;
+	else if (!offset_valid)
+		return -EINVAL;
+
+	return 0;
+}
+
+static __always_inline int add_tlv(struct __sk_buff *skb,
+				   struct ip6_srh_t *srh, uint32_t tlv_off,
+				   struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+	uint8_t len_remaining, new_pad;
+	uint32_t pad_off = 0;
+	uint32_t pad_size = 0;
+	uint32_t partial_srh_len;
+	int err;
+
+	if (tlv_off != -1)
+		tlv_off += srh_off;
+
+	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+		return -EINVAL;
+
+	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+	if (err)
+		return err;
+
+	// the following can't be moved inside update_tlv_pad because the
+	// bpf verifier has some issues with it
+	pad_off += sizeof(*itlv) + itlv->len;
+	partial_srh_len = pad_off - srh_off;
+	len_remaining = partial_srh_len % 8;
+	new_pad = 8 - len_remaining;
+
+	if (new_pad == 1) // cannot pad for 1 byte only
+		new_pad = 9;
+	else if (new_pad == 8)
+		new_pad = 0;
+
+	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("lwt_seg6local")
+int __add_egr_x(struct __sk_buff *skb)
+{
+	unsigned long long hi = 0xfc42000000000000;
+	unsigned long long lo = 0x1;
+	struct ip6_srh_t *srh = get_srh(skb);
+	uint8_t new_flags = SR6_FLAG_ALERT;
+	struct ip6_addr_t addr;
+	int err, offset;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+		      (struct sr6_tlv_t *)&tlv, 20);
+	if (err)
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+	err = bpf_lwt_seg6_store_bytes(skb, offset,
+				       (void *)&new_flags, sizeof(new_flags));
+	if (err)
+		return BPF_DROP;
+
+	addr.lo = htonll(lo);
+	addr.hi = htonll(hi);
+	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+				  (void *)&addr, sizeof(addr));
+	if (err)
+		return BPF_DROP;
+	return BPF_REDIRECT;
+}
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 5b54ec637ada..ea7d84f01235 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -21,40 +21,40 @@ int _version SEC("version") = 1;
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
 
-struct bpf_map_def SEC("maps") outer_map = {
-	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") result_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = NR_RESULTS,
-};
-
-struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(int),
-	.max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") linum_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") data_check_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct data_check),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} outer_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, NR_RESULTS);
+	__type(key, __u32);
+	__type(value, __u32);
+} result_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, int);
+} tmp_index_ovr_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} linum_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct data_check);
+} data_check_map SEC(".maps");
 
 #define GOTO_DONE(_result) ({			\
 	result = (_result);			\
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
new file mode 100644
index 000000000000..0e6be01157e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} info_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} status_map SEC(".maps");
+
+SEC("send_signal_demo")
+int bpf_send_signal_test(void *ctx)
+{
+	__u64 *info_val, *status_val;
+	__u32 key = 0, pid, sig;
+	int ret;
+
+	status_val = bpf_map_lookup_elem(&status_map, &key);
+	if (!status_val || *status_val != 0)
+		return 0;
+
+	info_val = bpf_map_lookup_elem(&info_map, &key);
+	if (!info_val || *info_val == 0)
+		return 0;
+
+	sig = *info_val >> 32;
+	pid = *info_val & 0xffffFFFF;
+
+	if ((bpf_get_current_pid_tgid() >> 32) == pid) {
+		ret = bpf_send_signal(sig);
+		if (ret == 0)
+			*status_val = 1;
+	}
+
+	return 0;
+}
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index 1c39e4ccb7f1..a47b003623ef 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -27,58 +27,52 @@ enum bpf_linum_array_idx {
 	__NR_BPF_LINUM_ARRAY_IDX,
 };
 
-struct bpf_map_def SEC("maps") addr_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct sockaddr_in6),
-	.max_entries = __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-struct bpf_map_def SEC("maps") sock_result_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_sock),
-	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,
-};
-
-struct bpf_map_def SEC("maps") tcp_sock_result_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_tcp_sock),
-	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,
-};
-
-struct bpf_map_def SEC("maps") linum_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = __NR_BPF_LINUM_ARRAY_IDX,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, struct sockaddr_in6);
+} addr_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, struct bpf_sock);
+} sock_result_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, struct bpf_tcp_sock);
+} tcp_sock_result_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, __u32);
+} linum_map SEC(".maps");
 
 struct bpf_spinlock_cnt {
 	struct bpf_spin_lock lock;
 	__u32 cnt;
 };
 
-struct bpf_map_def SEC("maps") sk_pkt_out_cnt = {
-	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct bpf_spinlock_cnt),
-	.max_entries = 0,
-	.map_flags = BPF_F_NO_PREALLOC,
-};
-
-BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt);
-
-struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = {
-	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct bpf_spinlock_cnt),
-	.max_entries = 0,
-	.map_flags = BPF_F_NO_PREALLOC,
-};
-
-BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt);
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct bpf_spinlock_cnt);
+} sk_pkt_out_cnt SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct bpf_spinlock_cnt);
+} sk_pkt_out_cnt10 SEC(".maps");
 
 static bool is_loopback6(__u32 *a6)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 40f904312090..a43b999c8da2 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -10,29 +10,23 @@ struct hmap_elem {
 	int test_padding;
 };
 
-struct bpf_map_def SEC("maps") hmap = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct hmap_elem),
-	.max_entries = 1,
-};
-
-BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem);
-
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct hmap_elem);
+} hmap SEC(".maps");
 
 struct cls_elem {
 	struct bpf_spin_lock lock;
 	volatile int cnt;
 };
 
-struct bpf_map_def SEC("maps") cls_map = {
-	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
-	.key_size = sizeof(struct bpf_cgroup_storage_key),
-	.value_size = sizeof(struct cls_elem),
-};
-
-BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key,
-		     struct cls_elem);
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct cls_elem);
+} cls_map SEC(".maps");
 
 struct bpf_vqueue {
 	struct bpf_spin_lock lock;
@@ -42,14 +36,13 @@ struct bpf_vqueue {
 	unsigned int rate;
 };
 
-struct bpf_map_def SEC("maps") vqueue = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct bpf_vqueue),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct bpf_vqueue);
+} vqueue SEC(".maps");
 
-BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue);
 #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
 
 SEC("spin_lock_demo")
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index d86c281e957f..bbfc8337b6f0 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -8,36 +8,37 @@
 #define PERF_MAX_STACK_DEPTH         127
 #endif
 
-struct bpf_map_def SEC("maps") control_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} control_map SEC(".maps");
 
-struct bpf_map_def SEC("maps") stackid_hmap = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 16384,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, __u32);
+} stackid_hmap SEC(".maps");
 
-struct bpf_map_def SEC("maps") stackmap = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_stack_build_id)
-		* PERF_MAX_STACK_DEPTH,
-	.max_entries = 128,
-	.map_flags = BPF_F_STACK_BUILD_ID,
-};
+typedef struct bpf_stack_build_id stack_trace_t[PERF_MAX_STACK_DEPTH];
 
-struct bpf_map_def SEC("maps") stack_amap = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_stack_build_id)
-		* PERF_MAX_STACK_DEPTH,
-	.max_entries = 128,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 128);
+	__uint(map_flags, BPF_F_STACK_BUILD_ID);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 128);
+	__type(key, __u32);
+	/* there seems to be a bug in kernel not handling typedef properly */
+	struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH];
+} stack_amap SEC(".maps");
 
 /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
 struct random_urandom_args {
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index af111af7ca1a..803c15dc109d 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -8,33 +8,35 @@
 #define PERF_MAX_STACK_DEPTH         127
 #endif
 
-struct bpf_map_def SEC("maps") control_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} control_map SEC(".maps");
 
-struct bpf_map_def SEC("maps") stackid_hmap = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 16384,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, __u32);
+} stackid_hmap SEC(".maps");
 
-struct bpf_map_def SEC("maps") stackmap = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
-	.max_entries = 16384,
-};
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
 
-struct bpf_map_def SEC("maps") stack_amap = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
-	.max_entries = 16384,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16384);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__u64 (*value)[PERF_MAX_STACK_DEPTH];
+} stack_amap SEC(".maps");
 
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 struct sched_switch_args {
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
new file mode 100644
index 000000000000..608a06871572
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
+#define TCP_MEM_LOOPS 28  /* because 30 doesn't fit into 512 bytes of stack */
+#define MAX_ULONG_STR_LEN 7
+#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+
+static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
+	unsigned char i;
+	char name[64];
+	int ret;
+
+	memset(name, 0, sizeof(name));
+	ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+	if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < sizeof(tcp_mem_name); ++i)
+		if (name[i] != tcp_mem_name[i])
+			return 0;
+
+	return 1;
+}
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+	unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
+	char value[MAX_VALUE_STR_LEN];
+	unsigned char i, off = 0;
+	int ret;
+
+	if (ctx->write)
+		return 0;
+
+	if (!is_tcp_mem(ctx))
+		return 0;
+
+	ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+	if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+		ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+				  tcp_mem + i);
+		if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+			return 0;
+		off += ret & MAX_ULONG_STR_LEN;
+	}
+
+	return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
new file mode 100644
index 000000000000..cb201cbe11e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
+#define TCP_MEM_LOOPS 20  /* because 30 doesn't fit into 512 bytes of stack */
+#define MAX_ULONG_STR_LEN 7
+#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+
+static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
+	unsigned char i;
+	char name[64];
+	int ret;
+
+	memset(name, 0, sizeof(name));
+	ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+	if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < sizeof(tcp_mem_name); ++i)
+		if (name[i] != tcp_mem_name[i])
+			return 0;
+
+	return 1;
+}
+
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+	unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
+	char value[MAX_VALUE_STR_LEN];
+	unsigned char i, off = 0;
+	int ret;
+
+	if (ctx->write)
+		return 0;
+
+	if (!is_tcp_mem(ctx))
+		return 0;
+
+	ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+	if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+		ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+				  tcp_mem + i);
+		if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+			return 0;
+		off += ret & MAX_ULONG_STR_LEN;
+	}
+
+	return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index a295cad805d7..5cbbff416998 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -8,7 +8,6 @@
 #include <linux/bpf.h>
 
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 
 /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
 #define MAX_ULONG_STR_LEN 0xF
@@ -16,6 +15,10 @@
 /* Max supported length of sysctl value string (pow2). */
 #define MAX_VALUE_STR_LEN 0x40
 
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
 static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
 {
 	char tcp_mem_name[] = "net/ipv4/tcp_mem";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index bee3bbecc0c4..c8c595da38d4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -148,12 +148,12 @@ struct tcp_estats_basic_event {
 	struct tcp_estats_conn_id conn_id;
 };
 
-struct bpf_map_def SEC("maps") ev_record_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct tcp_estats_basic_event),
-	.max_entries = 1024,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1024);
+	__type(key, __u32);
+	__type(value, struct tcp_estats_basic_event);
+} ev_record_map SEC(".maps");
 
 struct dummy_tracepoint_args {
 	unsigned long long pad;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index c7c3240e0dd4..2e233613d1fc 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -14,19 +14,19 @@
 #include "bpf_endian.h"
 #include "test_tcpbpf.h"
 
-struct bpf_map_def SEC("maps") global_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct tcpbpf_globals),
-	.max_entries = 4,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, __u32);
+	__type(value, struct tcpbpf_globals);
+} global_map SEC(".maps");
 
-struct bpf_map_def SEC("maps") sockopt_results = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(int),
-	.max_entries = 2,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, int);
+} sockopt_results SEC(".maps");
 
 static inline void update_event_map(int event)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index ec6db6e64c41..08346e7765d5 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -14,19 +14,19 @@
 #include "bpf_endian.h"
 #include "test_tcpnotify.h"
 
-struct bpf_map_def SEC("maps") global_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct tcpnotify_globals),
-	.max_entries = 4,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, __u32);
+	__type(value, struct tcpnotify_globals);
+} global_map SEC(".maps");
 
-struct bpf_map_def SEC("maps") perf_event_map = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(__u32),
-	.max_entries = 2,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 2);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(__u32));
+} perf_event_map SEC(".maps");
 
 int _version SEC("version") = 1;
 
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index 77830693eccb..9897150ed516 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -2,7 +2,7 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
-#define ATTR __attribute__((always_inline))
+#define ATTR __always_inline
 #include "test_jhash.h"
 
 SEC("scale90_inline")
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 5e7df8bb5b5d..0941c655b07b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -22,19 +22,19 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def SEC("maps") rxcnt = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = 256,
-};
-
-struct bpf_map_def SEC("maps") vip2tnl = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip),
-	.value_size = sizeof(struct iptnl_info),
-	.max_entries = MAX_IPTNL_ENTRIES,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 256);
+	__type(key, __u32);
+	__type(value, __u64);
+} rxcnt SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_IPTNL_ENTRIES);
+	__type(key, struct vip);
+	__type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
 
 static __always_inline void count_tx(__u32 protocol)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
new file mode 100644
index 000000000000..97175f73c3fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_iptunnel_common.h"
+
+int _version SEC("version") = 1;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 256);
+	__type(key, __u32);
+	__type(value, __u64);
+} rxcnt SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_IPTNL_ENTRIES);
+	__type(key, struct vip);
+	__type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
+
+static __always_inline void count_tx(__u32 protocol)
+{
+	__u64 *rxcnt_count;
+
+	rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+	if (rxcnt_count)
+		*rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+				     __u8 protocol)
+{
+	struct tcphdr *th;
+	struct udphdr *uh;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		th = (struct tcphdr *)trans_data;
+		if (th + 1 > data_end)
+			return -1;
+		return th->dest;
+	case IPPROTO_UDP:
+		uh = (struct udphdr *)trans_data;
+		if (uh + 1 > data_end)
+			return -1;
+		return uh->dest;
+	default:
+		return 0;
+	}
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+				       const struct ethhdr *old_eth,
+				       const struct iptnl_info *tnl,
+				       __be16 h_proto)
+{
+	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+	new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct iphdr *iph = data + sizeof(struct ethhdr);
+	__u16 *next_iph;
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+	__u32 csum = 0;
+	int i;
+
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(iph + 1, data_end, iph->protocol);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = iph->protocol;
+	vip.family = AF_INET;
+	vip.daddr.v4 = iph->daddr;
+	vip.dport = dport;
+	payload_len = bpf_ntohs(iph->tot_len);
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v4-in-v4 */
+	if (!tnl || tnl->family != AF_INET)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	iph = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*iph);
+
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end ||
+	    iph + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+	iph->version = 4;
+	iph->ihl = sizeof(*iph) >> 2;
+	iph->frag_off =	0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 0;
+	iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
+	iph->daddr = tnl->daddr.v4;
+	iph->saddr = tnl->saddr.v4;
+	iph->ttl = 8;
+
+	next_iph = (__u16 *)iph;
+#pragma clang loop unroll(disable)
+	for (i = 0; i < sizeof(*iph) >> 1; i++)
+		csum += *next_iph++;
+
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = ip6h->nexthdr;
+	vip.family = AF_INET6;
+	memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+	vip.dport = dport;
+	payload_len = ip6h->payload_len;
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v6-in-v6 */
+	if (!tnl || tnl->family != AF_INET6)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	ip6h = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*ip6h);
+
+	if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+	    ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+	ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip6h->hop_limit = 8;
+	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+	memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == bpf_htons(ETH_P_IP))
+		return handle_ipv4(xdp);
+	else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+		return handle_ipv6(xdp);
+	else
+		return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 5e4aac74f9d0..dad8a7e33eaa 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -15,13 +15,6 @@
 #include <linux/udp.h>
 #include "bpf_helpers.h"
 
-#define bpf_printk(fmt, ...)				\
-({							\
-	char ____fmt[] = fmt;				\
-	bpf_trace_printk(____fmt, sizeof(____fmt),	\
-			##__VA_ARGS__);			\
-})
-
 static __u32 rol32(__u32 word, unsigned int shift)
 {
 	return (word << shift) | (word >> ((-shift) & 31));
@@ -170,53 +163,48 @@ struct lb_stats {
 	__u64 v1;
 };
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip_definition),
-	.value_size = sizeof(struct vip_meta),
-	.max_entries = 512,
-	.map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
-	.type = BPF_MAP_TYPE_LRU_HASH,
-	.key_size = sizeof(struct flow_key),
-	.value_size = sizeof(struct real_pos_lru),
-	.max_entries = 300,
-	.map_flags = 1U << 1,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 12 * 655,
-	.map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct real_definition),
-	.max_entries = 40,
-	.map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct lb_stats),
-	.max_entries = 515,
-	.map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct ctl_value),
-	.max_entries = 16,
-	.map_flags = 0,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 512);
+	__type(key, struct vip_definition);
+	__type(value, struct vip_meta);
+} vip_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(max_entries, 300);
+	__uint(map_flags, 1U << 1);
+	__type(key, struct flow_key);
+	__type(value, struct real_pos_lru);
+} lru_cache SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 12 * 655);
+	__type(key, __u32);
+	__type(value, __u32);
+} ch_rings SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 40);
+	__type(key, __u32);
+	__type(value, struct real_definition);
+} reals SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 515);
+	__type(key, __u32);
+	__type(value, struct lb_stats);
+} stats SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 16);
+	__type(key, __u32);
+	__type(value, struct ctl_value);
+} ctl_array SEC(".maps");
 
 struct eth_hdr {
 	unsigned char eth_dest[6];
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
new file mode 100644
index 000000000000..1c5f298d7196
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(max_entries, 8);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} tx_port SEC(".maps");
+
+SEC("redirect_map_0")
+int xdp_redirect_map_0(struct xdp_md *xdp)
+{
+	return bpf_redirect_map(&tx_port, 0, 0);
+}
+
+SEC("redirect_map_1")
+int xdp_redirect_map_1(struct xdp_md *xdp)
+{
+	return bpf_redirect_map(&tx_port, 1, 0);
+}
+
+SEC("redirect_map_2")
+int xdp_redirect_map_2(struct xdp_md *xdp)
+{
+	return bpf_redirect_map(&tx_port, 2, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
new file mode 100644
index 000000000000..57912e7c94b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("tx")
+int xdp_tx(struct xdp_md *xdp)
+{
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
new file mode 100644
index 000000000000..112a2857f4e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
+
+#define KBUILD_MODNAME "foo"
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#include "xdping.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 256);
+	__type(key, __u32);
+	__type(value, struct pinginfo);
+} ping_map SEC(".maps");
+
+static __always_inline void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+static __always_inline __u16 csum_fold_helper(__wsum sum)
+{
+	sum = (sum & 0xffff) + (sum >> 16);
+	return ~((sum & 0xffff) + (sum >> 16));
+}
+
+static __always_inline __u16 ipv4_csum(void *data_start, int data_size)
+{
+	__wsum sum;
+
+	sum = bpf_csum_diff(0, 0, data_start, data_size, 0);
+	return csum_fold_helper(sum);
+}
+
+#define ICMP_ECHO_LEN		64
+
+static __always_inline int icmp_check(struct xdp_md *ctx, int type)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	struct icmphdr *icmph;
+	struct iphdr *iph;
+
+	if (data + sizeof(*eth) + sizeof(*iph) + ICMP_ECHO_LEN > data_end)
+		return XDP_PASS;
+
+	if (eth->h_proto != bpf_htons(ETH_P_IP))
+		return XDP_PASS;
+
+	iph = data + sizeof(*eth);
+
+	if (iph->protocol != IPPROTO_ICMP)
+		return XDP_PASS;
+
+	if (bpf_ntohs(iph->tot_len) - sizeof(*iph) != ICMP_ECHO_LEN)
+		return XDP_PASS;
+
+	icmph = data + sizeof(*eth) + sizeof(*iph);
+
+	if (icmph->type != type)
+		return XDP_PASS;
+
+	return XDP_TX;
+}
+
+SEC("xdpclient")
+int xdping_client(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct pinginfo *pinginfo = NULL;
+	struct ethhdr *eth = data;
+	struct icmphdr *icmph;
+	struct iphdr *iph;
+	__u64 recvtime;
+	__be32 raddr;
+	__be16 seq;
+	int ret;
+	__u8 i;
+
+	ret = icmp_check(ctx, ICMP_ECHOREPLY);
+
+	if (ret != XDP_TX)
+		return ret;
+
+	iph = data + sizeof(*eth);
+	icmph = data + sizeof(*eth) + sizeof(*iph);
+	raddr = iph->saddr;
+
+	/* Record time reply received. */
+	recvtime = bpf_ktime_get_ns();
+	pinginfo = bpf_map_lookup_elem(&ping_map, &raddr);
+	if (!pinginfo || pinginfo->seq != icmph->un.echo.sequence)
+		return XDP_PASS;
+
+	if (pinginfo->start) {
+#pragma clang loop unroll(full)
+		for (i = 0; i < XDPING_MAX_COUNT; i++) {
+			if (pinginfo->times[i] == 0)
+				break;
+		}
+		/* verifier is fussy here... */
+		if (i < XDPING_MAX_COUNT) {
+			pinginfo->times[i] = recvtime -
+					     pinginfo->start;
+			pinginfo->start = 0;
+			i++;
+		}
+		/* No more space for values? */
+		if (i == pinginfo->count || i == XDPING_MAX_COUNT)
+			return XDP_PASS;
+	}
+
+	/* Now convert reply back into echo request. */
+	swap_src_dst_mac(data);
+	iph->saddr = iph->daddr;
+	iph->daddr = raddr;
+	icmph->type = ICMP_ECHO;
+	seq = bpf_htons(bpf_ntohs(icmph->un.echo.sequence) + 1);
+	icmph->un.echo.sequence = seq;
+	icmph->checksum = 0;
+	icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN);
+
+	pinginfo->seq = seq;
+	pinginfo->start = bpf_ktime_get_ns();
+
+	return XDP_TX;
+}
+
+SEC("xdpserver")
+int xdping_server(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	struct icmphdr *icmph;
+	struct iphdr *iph;
+	__be32 raddr;
+	int ret;
+
+	ret = icmp_check(ctx, ICMP_ECHO);
+
+	if (ret != XDP_TX)
+		return ret;
+
+	iph = data + sizeof(*eth);
+	icmph = data + sizeof(*eth) + sizeof(*iph);
+	raddr = iph->saddr;
+
+	/* Now convert request into echo reply. */
+	swap_src_dst_mac(data);
+	iph->saddr = iph->daddr;
+	iph->daddr = raddr;
+	icmph->type = ICMP_ECHOREPLY;
+	icmph->checksum = 0;
+	icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN);
+
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 3c789d03b629..0262f7b374f9 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -180,7 +180,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
+			{7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
 			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
 			{8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
 			{9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
@@ -315,7 +315,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
@@ -405,7 +405,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
@@ -473,12 +473,12 @@ static struct bpf_align_test tests[] = {
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
-			{7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			{7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
 			/* Checked s>=0 */
 			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 			/* packet pointer + nonnegative (4n+2) */
 			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-			{13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -486,7 +486,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 		}
 	},
 	{
@@ -521,7 +521,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
@@ -574,7 +574,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 42c1ce988945..8351cb5f4a20 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4016,71 +4016,18 @@ struct btf_file_test {
 };
 
 static struct btf_file_test file_tests[] = {
-{
-	.file = "test_btf_haskv.o",
-},
-{
-	.file = "test_btf_nokv.o",
-	.btf_kv_notfound = true,
-},
+	{ .file = "test_btf_haskv.o", },
+	{ .file = "test_btf_newkv.o", },
+	{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
 };
 
-static int file_has_btf_elf(const char *fn, bool *has_btf_ext)
-{
-	Elf_Scn *scn = NULL;
-	GElf_Ehdr ehdr;
-	int ret = 0;
-	int elf_fd;
-	Elf *elf;
-
-	if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
-		  "elf_version(EV_CURRENT) == EV_NONE"))
-		return -1;
-
-	elf_fd = open(fn, O_RDONLY);
-	if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
-		return -1;
-
-	elf = elf_begin(elf_fd, ELF_C_READ, NULL);
-	if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
-		ret = -1;
-		goto done;
-	}
-
-	if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
-		ret = -1;
-		goto done;
-	}
-
-	while ((scn = elf_nextscn(elf, scn))) {
-		const char *sh_name;
-		GElf_Shdr sh;
-
-		if (CHECK(gelf_getshdr(scn, &sh) != &sh,
-			  "file:%s gelf_getshdr != &sh", fn)) {
-			ret = -1;
-			goto done;
-		}
-
-		sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
-		if (!strcmp(sh_name, BTF_ELF_SEC))
-			ret = 1;
-		if (!strcmp(sh_name, BTF_EXT_ELF_SEC))
-			*has_btf_ext = true;
-	}
-
-done:
-	close(elf_fd);
-	elf_end(elf);
-	return ret;
-}
-
 static int do_test_file(unsigned int test_num)
 {
 	const struct btf_file_test *test = &file_tests[test_num - 1];
 	const char *expected_fnames[] = {"_dummy_tracepoint",
 					 "test_long_fname_1",
 					 "test_long_fname_2"};
+	struct btf_ext *btf_ext = NULL;
 	struct bpf_prog_info info = {};
 	struct bpf_object *obj = NULL;
 	struct bpf_func_info *finfo;
@@ -4095,15 +4042,19 @@ static int do_test_file(unsigned int test_num)
 	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
 		test->file);
 
-	err = file_has_btf_elf(test->file, &has_btf_ext);
-	if (err == -1)
-		return err;
-
-	if (err == 0) {
-		fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
-		skip_cnt++;
-		return 0;
+	btf = btf__parse_elf(test->file, &btf_ext);
+	if (IS_ERR(btf)) {
+		if (PTR_ERR(btf) == -ENOENT) {
+			fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+			skip_cnt++;
+			return 0;
+		}
+		return PTR_ERR(btf);
 	}
+	btf__free(btf);
+
+	has_btf_ext = btf_ext != NULL;
+	btf_ext__free(btf_ext);
 
 	obj = bpf_object__open(test->file);
 	if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c
new file mode 100644
index 000000000000..8f850823d35f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_dump.c
@@ -0,0 +1,143 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <btf.h>
+
+#define CHECK(condition, format...) ({					\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
+		fprintf(stderr, format);				\
+	}								\
+	__ret;								\
+})
+
+void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+	vfprintf(ctx, fmt, args);
+}
+
+struct btf_dump_test_case {
+	const char *name;
+	struct btf_dump_opts opts;
+} btf_dump_test_cases[] = {
+	{.name = "btf_dump_test_case_syntax", .opts = {}},
+	{.name = "btf_dump_test_case_ordering", .opts = {}},
+	{.name = "btf_dump_test_case_padding", .opts = {}},
+	{.name = "btf_dump_test_case_packing", .opts = {}},
+	{.name = "btf_dump_test_case_bitfields", .opts = {}},
+	{.name = "btf_dump_test_case_multidim", .opts = {}},
+	{.name = "btf_dump_test_case_namespacing", .opts = {}},
+};
+
+static int btf_dump_all_types(const struct btf *btf,
+			      const struct btf_dump_opts *opts)
+{
+	size_t type_cnt = btf__get_nr_types(btf);
+	struct btf_dump *d;
+	int err = 0, id;
+
+	d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
+
+	for (id = 1; id <= type_cnt; id++) {
+		err = btf_dump__dump_type(d, id);
+		if (err)
+			goto done;
+	}
+
+done:
+	btf_dump__free(d);
+	return err;
+}
+
+int test_btf_dump_case(int n, struct btf_dump_test_case *test_case)
+{
+	char test_file[256], out_file[256], diff_cmd[1024];
+	struct btf *btf = NULL;
+	int err = 0, fd = -1;
+	FILE *f = NULL;
+
+	fprintf(stderr, "Test case #%d (%s): ", n, test_case->name);
+
+	snprintf(test_file, sizeof(test_file), "%s.o", test_case->name);
+
+	btf = btf__parse_elf(test_file, NULL);
+	if (CHECK(IS_ERR(btf),
+	    "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+		err = -PTR_ERR(btf);
+		btf = NULL;
+		goto done;
+	}
+
+	snprintf(out_file, sizeof(out_file),
+		 "/tmp/%s.output.XXXXXX", test_case->name);
+	fd = mkstemp(out_file);
+	if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) {
+		err = fd;
+		goto done;
+	}
+	f = fdopen(fd, "w");
+	if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n",
+		  strerror(errno), errno)) {
+		close(fd);
+		goto done;
+	}
+
+	test_case->opts.ctx = f;
+	err = btf_dump_all_types(btf, &test_case->opts);
+	fclose(f);
+	close(fd);
+	if (CHECK(err, "failure during C dumping: %d\n", err)) {
+		goto done;
+	}
+
+	snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name);
+	/*
+	 * Diff test output and expected test output, contained between
+	 * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case.
+	 * For expected output lines, everything before '*' is stripped out.
+	 * Also lines containing comment start and comment end markers are
+	 * ignored. 
+	 */
+	snprintf(diff_cmd, sizeof(diff_cmd),
+		 "awk '/START-EXPECTED-OUTPUT/{out=1;next} "
+		 "/END-EXPECTED-OUTPUT/{out=0} "
+		 "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */
+		 "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'",
+		 test_file, out_file);
+	err = system(diff_cmd);
+	if (CHECK(err,
+		  "differing test output, output=%s, err=%d, diff cmd:\n%s\n",
+		  out_file, err, diff_cmd))
+		goto done;
+
+	remove(out_file);
+	fprintf(stderr, "OK\n");
+
+done:
+	btf__free(btf);
+	return err;
+}
+
+int main() {
+	int test_case_cnt, i, err, failed = 0;
+
+	test_case_cnt = sizeof(btf_dump_test_cases) /
+			sizeof(btf_dump_test_cases[0]);
+
+	for (i = 0; i < test_case_cnt; i++) {
+		err = test_btf_dump_case(i, &btf_dump_test_cases[i]);
+		if (err)
+			failed++;
+	}
+
+	fprintf(stderr, "%d tests succeeded, %d tests failed.\n",
+		test_case_cnt - failed, failed);
+
+	return failed;
+}
diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
new file mode 100644
index 000000000000..7671909ee1cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program accesses the map passed in to store two pieces of
+ *   information. The number of invocations of the program, which maps
+ *   to the number of packets received, is stored to key 0. Key 1 is
+ *   incremented on each iteration by the number of bytes stored in
+ *   the skb. The program also stores the number of received bytes
+ *   in the cgroup storage.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ *   packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define FOO		"/foo"
+#define BAR		"/foo/bar/"
+#define PING_CMD	"ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#ifdef DEBUG
+#define debug(args...) printf(args)
+#else
+#define debug(args...)
+#endif
+
+static int prog_load(int verdict)
+{
+	int ret;
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+			       prog, insns_cnt, "GPL", 0,
+			       bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	if (ret < 0) {
+		log_err("Loading program");
+		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
+		return 0;
+	}
+	return ret;
+}
+
+static int test_foo_bar(void)
+{
+	int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
+
+	allow_prog = prog_load(1);
+	if (!allow_prog)
+		goto err;
+
+	drop_prog = prog_load(0);
+	if (!drop_prog)
+		goto err;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	/* Create cgroup /foo, get fd, and join it */
+	foo = create_and_get_cgroup(FOO);
+	if (foo < 0)
+		goto err;
+
+	if (join_cgroup(FOO))
+		goto err;
+
+	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo");
+		goto err;
+	}
+
+	debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
+	assert(system(PING_CMD) != 0);
+
+	/* Create cgroup /foo/bar, get fd, and join it */
+	bar = create_and_get_cgroup(BAR);
+	if (bar < 0)
+		goto err;
+
+	if (join_cgroup(BAR))
+		goto err;
+
+	debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
+	assert(system(PING_CMD) != 0);
+
+	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo/bar");
+		goto err;
+	}
+
+	debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
+	assert(system(PING_CMD) == 0);
+
+	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching program from /foo/bar");
+		goto err;
+	}
+
+	debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
+	       "This ping in cgroup /foo/bar should fail...\n");
+	assert(system(PING_CMD) != 0);
+
+	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo/bar");
+		goto err;
+	}
+
+	if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching program from /foo");
+		goto err;
+	}
+
+	debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
+	       "This ping in cgroup /foo/bar should pass...\n");
+	assert(system(PING_CMD) == 0);
+
+	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
+		errno = 0;
+		log_err("Unexpected success attaching prog to /foo/bar");
+		goto err;
+	}
+
+	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching program from /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
+		errno = 0;
+		log_err("Unexpected success in double detach from /foo");
+		goto err;
+	}
+
+	if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
+		log_err("Attaching non-overridable prog to /foo");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
+		errno = 0;
+		log_err("Unexpected success attaching non-overridable prog to /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_ALLOW_OVERRIDE)) {
+		errno = 0;
+		log_err("Unexpected success attaching overridable prog to /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_ALLOW_OVERRIDE)) {
+		errno = 0;
+		log_err("Unexpected success attaching overridable prog to /foo");
+		goto err;
+	}
+
+	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
+		log_err("Attaching different non-overridable prog to /foo");
+		goto err;
+	}
+
+	goto out;
+
+err:
+	rc = 1;
+
+out:
+	close(foo);
+	close(bar);
+	cleanup_cgroup_environment();
+	if (!rc)
+		printf("#override:PASS\n");
+	else
+		printf("#override:FAIL\n");
+	return rc;
+}
+
+static int map_fd = -1;
+
+static int prog_load_cnt(int verdict, int val)
+{
+	int cgroup_storage_fd, percpu_cgroup_storage_fd;
+
+	if (map_fd < 0)
+		map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+	if (map_fd < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		return -1;
+	}
+
+	cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+				sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+	if (cgroup_storage_fd < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		return -1;
+	}
+
+	percpu_cgroup_storage_fd = bpf_create_map(
+		BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+		sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+	if (percpu_cgroup_storage_fd < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		return -1;
+	}
+
+	struct bpf_insn prog[] = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
+		BPF_MOV64_IMM(BPF_REG_2, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+		BPF_MOV64_IMM(BPF_REG_1, val),
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
+
+		BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
+		BPF_MOV64_IMM(BPF_REG_2, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
+		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+
+		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	int ret;
+
+	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+			       prog, insns_cnt, "GPL", 0,
+			       bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	if (ret < 0) {
+		log_err("Loading program");
+		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
+		return 0;
+	}
+	close(cgroup_storage_fd);
+	return ret;
+}
+
+
+static int test_multiprog(void)
+{
+	__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
+	int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
+	int drop_prog, allow_prog[6] = {}, rc = 0;
+	unsigned long long value;
+	int i = 0;
+
+	for (i = 0; i < 6; i++) {
+		allow_prog[i] = prog_load_cnt(1, 1 << i);
+		if (!allow_prog[i])
+			goto err;
+	}
+	drop_prog = prog_load_cnt(0, 1);
+	if (!drop_prog)
+		goto err;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cg1 = create_and_get_cgroup("/cg1");
+	if (cg1 < 0)
+		goto err;
+	cg2 = create_and_get_cgroup("/cg1/cg2");
+	if (cg2 < 0)
+		goto err;
+	cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
+	if (cg3 < 0)
+		goto err;
+	cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
+	if (cg4 < 0)
+		goto err;
+	cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
+	if (cg5 < 0)
+		goto err;
+
+	if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
+		goto err;
+
+	if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_MULTI)) {
+		log_err("Attaching prog to cg1");
+		goto err;
+	}
+	if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_ALLOW_MULTI)) {
+		log_err("Unexpected success attaching the same prog to cg1");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_MULTI)) {
+		log_err("Attaching prog2 to cg1");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to cg2");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_MULTI)) {
+		log_err("Attaching prog to cg3");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to cg4");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) {
+		log_err("Attaching prog to cg5");
+		goto err;
+	}
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 8 + 32);
+
+	/* query the number of effective progs in cg5 */
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      NULL, NULL, &prog_cnt) == 0);
+	assert(prog_cnt == 4);
+	/* retrieve prog_ids of effective progs in cg5 */
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      &attach_flags, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 4);
+	assert(attach_flags == 0);
+	saved_prog_id = prog_ids[0];
+	/* check enospc handling */
+	prog_ids[0] = 0;
+	prog_cnt = 2;
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      &attach_flags, prog_ids, &prog_cnt) == -1 &&
+	       errno == ENOSPC);
+	assert(prog_cnt == 4);
+	/* check that prog_ids are returned even when buffer is too small */
+	assert(prog_ids[0] == saved_prog_id);
+	/* retrieve prog_id of single attached prog in cg5 */
+	prog_ids[0] = 0;
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
+			      NULL, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 1);
+	assert(prog_ids[0] == saved_prog_id);
+
+	/* detach bottom program and ping again */
+	if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching prog from cg5");
+		goto err;
+	}
+	value = 0;
+	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 8 + 16);
+
+	/* detach 3rd from bottom program and ping again */
+	errno = 0;
+	if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Unexpected success on detach from cg3");
+		goto err;
+	}
+	if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching from cg3");
+		goto err;
+	}
+	value = 0;
+	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 16);
+
+	/* detach 2nd from bottom program and ping again */
+	if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching prog from cg4");
+		goto err;
+	}
+	value = 0;
+	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 4);
+
+	prog_cnt = 4;
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      &attach_flags, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 3);
+	assert(attach_flags == 0);
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
+			      NULL, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 0);
+	goto out;
+err:
+	rc = 1;
+
+out:
+	for (i = 0; i < 6; i++)
+		if (allow_prog[i] > 0)
+			close(allow_prog[i]);
+	close(cg1);
+	close(cg2);
+	close(cg3);
+	close(cg4);
+	close(cg5);
+	cleanup_cgroup_environment();
+	if (!rc)
+		printf("#multi:PASS\n");
+	else
+		printf("#multi:FAIL\n");
+	return rc;
+}
+
+static int test_autodetach(void)
+{
+	__u32 prog_cnt = 4, attach_flags;
+	int allow_prog[2] = {0};
+	__u32 prog_ids[2] = {0};
+	int cg = 0, i, rc = -1;
+	void *ptr = NULL;
+	int attempts;
+
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+		allow_prog[i] = prog_load_cnt(1, 1 << i);
+		if (!allow_prog[i])
+			goto err;
+	}
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	/* create a cgroup, attach two programs and remember their ids */
+	cg = create_and_get_cgroup("/cg_autodetach");
+	if (cg < 0)
+		goto err;
+
+	if (join_cgroup("/cg_autodetach"))
+		goto err;
+
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+		if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
+				    BPF_F_ALLOW_MULTI)) {
+			log_err("Attaching prog[%d] to cg:egress", i);
+			goto err;
+		}
+	}
+
+	/* make sure that programs are attached and run some traffic */
+	assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
+			      prog_ids, &prog_cnt) == 0);
+	assert(system(PING_CMD) == 0);
+
+	/* allocate some memory (4Mb) to pin the original cgroup */
+	ptr = malloc(4 * (1 << 20));
+	if (!ptr)
+		goto err;
+
+	/* close programs and cgroup fd */
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+		close(allow_prog[i]);
+		allow_prog[i] = 0;
+	}
+
+	close(cg);
+	cg = 0;
+
+	/* leave the cgroup and remove it. don't detach programs */
+	cleanup_cgroup_environment();
+
+	/* wait for the asynchronous auto-detachment.
+	 * wait for no more than 5 sec and give up.
+	 */
+	for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
+		for (attempts = 5; attempts >= 0; attempts--) {
+			int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
+
+			if (fd < 0)
+				break;
+
+			/* don't leave the fd open */
+			close(fd);
+
+			if (!attempts)
+				goto err;
+
+			sleep(1);
+		}
+	}
+
+	rc = 0;
+err:
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+		if (allow_prog[i] > 0)
+			close(allow_prog[i]);
+	if (cg)
+		close(cg);
+	free(ptr);
+	cleanup_cgroup_environment();
+	if (!rc)
+		printf("#autodetach:PASS\n");
+	else
+		printf("#autodetach:FAIL\n");
+	return rc;
+}
+
+int main(void)
+{
+	int (*tests[])(void) = {
+		test_foo_bar,
+		test_multiprog,
+		test_autodetach,
+	};
+	int errors = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++)
+		if (tests[i]())
+			errors++;
+
+	if (errors)
+		printf("test_cgroup_attach:FAIL\n");
+	else
+		printf("test_cgroup_attach:PASS\n");
+
+	return errors ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c
new file mode 100644
index 000000000000..b64094c981e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_hashmap.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Tests for libbpf's hashmap.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+#define CHECK(condition, format...) ({					\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
+		fprintf(stderr, format);				\
+	}								\
+	__ret;								\
+})
+
+size_t hash_fn(const void *k, void *ctx)
+{
+	return (long)k;
+}
+
+bool equal_fn(const void *a, const void *b, void *ctx)
+{
+	return (long)a == (long)b;
+}
+
+static inline size_t next_pow_2(size_t n)
+{
+	size_t r = 1;
+
+	while (r < n)
+		r <<= 1;
+	return r;
+}
+
+static inline size_t exp_cap(size_t sz)
+{
+	size_t r = next_pow_2(sz);
+
+	if (sz * 4 / 3 > r)
+		r <<= 1;
+	return r;
+}
+
+#define ELEM_CNT 62
+
+int test_hashmap_generic(void)
+{
+	struct hashmap_entry *entry, *tmp;
+	int err, bkt, found_cnt, i;
+	long long found_msk;
+	struct hashmap *map;
+
+	fprintf(stderr, "%s: ", __func__);
+
+	map = hashmap__new(hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
+		return 1;
+
+	for (i = 0; i < ELEM_CNT; i++) {
+		const void *oldk, *k = (const void *)(long)i;
+		void *oldv, *v = (void *)(long)(1024 + i);
+
+		err = hashmap__update(map, k, v, &oldk, &oldv);
+		if (CHECK(err != -ENOENT, "unexpected result: %d\n", err))
+			return 1;
+
+		if (i % 2) {
+			err = hashmap__add(map, k, v);
+		} else {
+			err = hashmap__set(map, k, v, &oldk, &oldv);
+			if (CHECK(oldk != NULL || oldv != NULL,
+				  "unexpected k/v: %p=%p\n", oldk, oldv))
+				return 1;
+		}
+
+		if (CHECK(err, "failed to add k/v %ld = %ld: %d\n",
+			       (long)k, (long)v, err))
+			return 1;
+
+		if (CHECK(!hashmap__find(map, k, &oldv),
+			  "failed to find key %ld\n", (long)k))
+			return 1;
+		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
+			return 1;
+	}
+
+	if (CHECK(hashmap__size(map) != ELEM_CNT,
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+
+	found_msk = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		long k = (long)entry->key;
+		long v = (long)entry->value;
+
+		found_msk |= 1ULL << k;
+		if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v))
+			return 1;
+	}
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+		  "not all keys iterated: %llx\n", found_msk))
+		return 1;
+
+	for (i = 0; i < ELEM_CNT; i++) {
+		const void *oldk, *k = (const void *)(long)i;
+		void *oldv, *v = (void *)(long)(256 + i);
+
+		err = hashmap__add(map, k, v);
+		if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err))
+			return 1;
+
+		if (i % 2)
+			err = hashmap__update(map, k, v, &oldk, &oldv);
+		else
+			err = hashmap__set(map, k, v, &oldk, &oldv);
+
+		if (CHECK(err, "failed to update k/v %ld = %ld: %d\n",
+			       (long)k, (long)v, err))
+			return 1;
+		if (CHECK(!hashmap__find(map, k, &oldv),
+			  "failed to find key %ld\n", (long)k))
+			return 1;
+		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
+			return 1;
+	}
+
+	if (CHECK(hashmap__size(map) != ELEM_CNT,
+		  "invalid updated map size: %zu\n", hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+
+	found_msk = 0;
+	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
+		long k = (long)entry->key;
+		long v = (long)entry->value;
+
+		found_msk |= 1ULL << k;
+		if (CHECK(v - k != 256,
+			  "invalid updated k/v pair: %ld = %ld\n", k, v))
+			return 1;
+	}
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+		  "not all keys iterated after update: %llx\n", found_msk))
+		return 1;
+
+	found_cnt = 0;
+	hashmap__for_each_key_entry(map, entry, (void *)0) {
+		found_cnt++;
+	}
+	if (CHECK(!found_cnt, "didn't find any entries for key 0\n"))
+		return 1;
+
+	found_msk = 0;
+	found_cnt = 0;
+	hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
+		const void *oldk, *k;
+		void *oldv, *v;
+
+		k = entry->key;
+		v = entry->value;
+
+		found_cnt++;
+		found_msk |= 1ULL << (long)k;
+
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+			  "failed to delete k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			return 1;
+		if (CHECK(oldk != k || oldv != v,
+			  "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
+			  (long)k, (long)v, (long)oldk, (long)oldv))
+			return 1;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+			  "unexpectedly deleted k/v %ld = %ld\n",
+			  (long)oldk, (long)oldv))
+			return 1;
+	}
+
+	if (CHECK(!found_cnt || !found_msk,
+		  "didn't delete any key entries\n"))
+		return 1;
+	if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt,
+		  "invalid updated map size (already deleted: %d): %zu\n",
+		  found_cnt, hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+
+	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
+		const void *oldk, *k;
+		void *oldv, *v;
+
+		k = entry->key;
+		v = entry->value;
+
+		found_cnt++;
+		found_msk |= 1ULL << (long)k;
+
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+			  "failed to delete k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			return 1;
+		if (CHECK(oldk != k || oldv != v,
+			  "invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
+			  (long)k, (long)v, (long)oldk, (long)oldv))
+			return 1;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+			  "unexpectedly deleted k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			return 1;
+	}
+
+	if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
+		  "not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
+		  found_cnt, found_msk))
+		return 1;
+	if (CHECK(hashmap__size(map) != 0,
+		  "invalid updated map size (already deleted: %d): %zu\n",
+		  found_cnt, hashmap__size(map)))
+		return 1;
+
+	found_cnt = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "unexpected map entries left: %ld = %ld\n",
+			     (long)entry->key, (long)entry->value);
+		return 1;
+	}
+
+	hashmap__free(map);
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "unexpected map entries left: %ld = %ld\n",
+			     (long)entry->key, (long)entry->value);
+		return 1;
+	}
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
+
+size_t collision_hash_fn(const void *k, void *ctx)
+{
+	return 0;
+}
+
+int test_hashmap_multimap(void)
+{
+	void *k1 = (void *)0, *k2 = (void *)1;
+	struct hashmap_entry *entry;
+	struct hashmap *map;
+	long found_msk;
+	int err, bkt;
+
+	fprintf(stderr, "%s: ", __func__);
+
+	/* force collisions */
+	map = hashmap__new(collision_hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
+		return 1;
+
+
+	/* set up multimap:
+	 * [0] -> 1, 2, 4;
+	 * [1] -> 8, 16, 32;
+	 */
+	err = hashmap__append(map, k1, (void *)1);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k1, (void *)2);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k1, (void *)4);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+
+	err = hashmap__append(map, k2, (void *)8);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k2, (void *)16);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k2, (void *)32);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+
+	if (CHECK(hashmap__size(map) != 6,
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		return 1;
+
+	/* verify global iteration still works and sees all values */
+	found_msk = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (1 << 6) - 1,
+		  "not all keys iterated: %lx\n", found_msk))
+		return 1;
+
+	/* iterate values for key 1 */
+	found_msk = 0;
+	hashmap__for_each_key_entry(map, entry, k1) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (1 | 2 | 4),
+		  "invalid k1 values: %lx\n", found_msk))
+		return 1;
+
+	/* iterate values for key 2 */
+	found_msk = 0;
+	hashmap__for_each_key_entry(map, entry, k2) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (8 | 16 | 32),
+		  "invalid k2 values: %lx\n", found_msk))
+		return 1;
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
+
+int test_hashmap_empty()
+{
+	struct hashmap_entry *entry;
+	int bkt;
+	struct hashmap *map;
+	void *k = (void *)0;
+
+	fprintf(stderr, "%s: ", __func__);
+
+	/* force collisions */
+	map = hashmap__new(hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
+		return 1;
+
+	if (CHECK(hashmap__size(map) != 0,
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != 0,
+		  "invalid map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+	if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n"))
+		return 1;
+	if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n"))
+		return 1;
+
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "unexpected iterated entry\n");
+		return 1;
+	}
+	hashmap__for_each_key_entry(map, entry, k) {
+		CHECK(false, "unexpected key entry\n");
+		return 1;
+	}
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	bool failed = false;
+
+	if (test_hashmap_generic())
+		failed = true;
+	if (test_hashmap_multimap())
+		failed = true;
+	if (test_hashmap_empty())
+		failed = true;
+
+	return failed;
+}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index a3fbc571280a..5443b9bd75ed 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1418,7 +1418,7 @@ static void test_map_wronly(void)
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
 }
 
-static void prepare_reuseport_grp(int type, int map_fd,
+static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
 				  __s64 *fds64, __u64 *sk_cookies,
 				  unsigned int n)
 {
@@ -1428,6 +1428,8 @@ static void prepare_reuseport_grp(int type, int map_fd,
 	const int optval = 1;
 	unsigned int i;
 	u64 sk_cookie;
+	void *value;
+	__s32 fd32;
 	__s64 fd64;
 	int err;
 
@@ -1449,8 +1451,14 @@ static void prepare_reuseport_grp(int type, int map_fd,
 		      "err:%d errno:%d\n", err, errno);
 
 		/* reuseport_array does not allow unbound sk */
-		err = bpf_map_update_elem(map_fd, &index0, &fd64,
-					  BPF_ANY);
+		if (map_elem_size == sizeof(__u64))
+			value = &fd64;
+		else {
+			assert(map_elem_size == sizeof(__u32));
+			fd32 = (__s32)fd64;
+			value = &fd32;
+		}
+		err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
 		CHECK(err != -1 || errno != EINVAL,
 		      "reuseport array update unbound sk",
 		      "sock_type:%d err:%d errno:%d\n",
@@ -1478,7 +1486,7 @@ static void prepare_reuseport_grp(int type, int map_fd,
 			 * reuseport_array does not allow
 			 * non-listening tcp sk.
 			 */
-			err = bpf_map_update_elem(map_fd, &index0, &fd64,
+			err = bpf_map_update_elem(map_fd, &index0, value,
 						  BPF_ANY);
 			CHECK(err != -1 || errno != EINVAL,
 			      "reuseport array update non-listening sk",
@@ -1541,7 +1549,7 @@ static void test_reuseport_array(void)
 	for (t = 0; t < ARRAY_SIZE(types); t++) {
 		type = types[t];
 
-		prepare_reuseport_grp(type, map_fd, grpa_fds64,
+		prepare_reuseport_grp(type, map_fd, sizeof(__u64), grpa_fds64,
 				      grpa_cookies, ARRAY_SIZE(grpa_fds64));
 
 		/* Test BPF_* update flags */
@@ -1649,7 +1657,8 @@ static void test_reuseport_array(void)
 				sizeof(__u32), sizeof(__u32), array_size, 0);
 	CHECK(map_fd == -1, "reuseport array create",
 	      "map_fd:%d, errno:%d\n", map_fd, errno);
-	prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
+	prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
+			      &sk_cookie, 1);
 	fd = fd64;
 	err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
 	CHECK(err == -1, "reuseport array update 32 bit fd",
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
index 295b9b3bc5c7..0e014d3b2b36 100644
--- a/tools/testing/selftests/bpf/test_queue_stack_map.h
+++ b/tools/testing/selftests/bpf/test_queue_stack_map.h
@@ -10,21 +10,21 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = {
-	.type = MAP_TYPE,
-	.key_size = 0,
-	.value_size = sizeof(__u32),
-	.max_entries = 32,
-	.map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = {
-	.type = MAP_TYPE,
-	.key_size = 0,
-	.value_size = sizeof(__u32),
-	.max_entries = 32,
-	.map_flags = 0,
-};
+struct {
+	__uint(type, MAP_TYPE);
+	__uint(max_entries, 32);
+	__uint(map_flags, 0);
+	__uint(key_size, 0);
+	__uint(value_size, sizeof(__u32));
+} map_in SEC(".maps");
+
+struct {
+	__uint(type, MAP_TYPE);
+	__uint(max_entries, 32);
+	__uint(map_flags, 0);
+	__uint(key_size, 0);
+	__uint(value_size, sizeof(__u32));
+} map_out SEC(".maps");
 
 SEC("test")
 int _test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
index dee2f2eceb0f..29833aeaf0de 100644
--- a/tools/testing/selftests/bpf/test_section_names.c
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -134,6 +134,16 @@ static struct sec_name_test tests[] = {
 		{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
 		{0, BPF_CGROUP_SYSCTL},
 	},
+	{
+		"cgroup/getsockopt",
+		{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT},
+		{0, BPF_CGROUP_GETSOCKOPT},
+	},
+	{
+		"cgroup/setsockopt",
+		{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
+		{0, BPF_CGROUP_SETSOCKOPT},
+	},
 };
 
 static int test_prog_type_by_name(const struct sec_name_test *test)
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
index 75646d9b34aa..7566c13eb51a 100644
--- a/tools/testing/selftests/bpf/test_select_reuseport.c
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -523,6 +523,58 @@ static void test_pass_on_err(int type, sa_family_t family)
 	printf("OK\n");
 }
 
+static void test_detach_bpf(int type, sa_family_t family)
+{
+#ifdef SO_DETACH_REUSEPORT_BPF
+	__u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
+	struct epoll_event ev;
+	int cli_fd, err, nev;
+	struct cmd cmd = {};
+	int optvalue = 0;
+
+	printf("%s: ", __func__);
+	err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
+			 &optvalue, sizeof(optvalue));
+	CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+	      "err:%d errno:%d\n", err, errno);
+
+	err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
+			 &optvalue, sizeof(optvalue));
+	CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+	      "err:%d errno:%d\n", err, errno);
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		err = bpf_map_lookup_elem(result_map, &i, &tmp);
+		CHECK(err == -1, "lookup_elem(result_map)",
+		      "i:%u err:%d errno:%d\n", i, err, errno);
+		nr_run_before += tmp;
+	}
+
+	cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
+	nev = epoll_wait(epfd, &ev, 1, 5);
+	CHECK(nev <= 0, "nev <= 0",
+	      "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
+	      nev,  type, family);
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		err = bpf_map_lookup_elem(result_map, &i, &tmp);
+		CHECK(err == -1, "lookup_elem(result_map)",
+		      "i:%u err:%d errno:%d\n", i, err, errno);
+		nr_run_after += tmp;
+	}
+
+	CHECK(nr_run_before != nr_run_after,
+	      "nr_run_before != nr_run_after",
+	      "nr_run_before:%u nr_run_after:%u\n",
+	      nr_run_before, nr_run_after);
+
+	printf("OK\n");
+	close(cli_fd);
+#else
+	printf("%s: SKIP\n", __func__);
+#endif
+}
+
 static void prepare_sk_fds(int type, sa_family_t family, bool inany)
 {
 	const int first = REUSEPORT_ARRAY_SIZE - 1;
@@ -664,6 +716,8 @@ static void test_all(void)
 			test_pass(type, family);
 			test_syncookie(type, family);
 			test_pass_on_err(type, family);
+			/* Must be the last test */
+			test_detach_bpf(type, family);
 
 			cleanup_per_test();
 			printf("\n");
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 4ecde2392327..61fd95b89af8 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -836,6 +836,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 	attr.file = path;
 	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
 	attr.expected_attach_type = test->expected_attach_type;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
 
 	if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
 		if (test->expected_result != LOAD_REJECT)
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index e089477fa0a3..f0fc103261a4 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -414,6 +414,7 @@ int main(int argc, char **argv)
 	struct bpf_prog_load_attr attr = {
 		.file = "test_sock_fields_kern.o",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+		.prog_flags = BPF_F_TEST_RND_HI32,
 	};
 	int cgroup_fd, egress_fd, ingress_fd, err;
 	struct bpf_program *ingress_prog;
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index e51d63786ff8..15653b0e26eb 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -18,6 +18,11 @@
 #define CG_PATH			"/foo"
 #define SOCKET_COOKIE_PROG	"./socket_cookie_prog.o"
 
+struct socket_cookie {
+	__u64 cookie_key;
+	__u32 cookie_value;
+};
+
 static int start_server(void)
 {
 	struct sockaddr_in6 addr;
@@ -89,8 +94,7 @@ static int validate_map(struct bpf_map *map, int client_fd)
 	__u32 cookie_expected_value;
 	struct sockaddr_in6 addr;
 	socklen_t len = sizeof(addr);
-	__u32 cookie_value;
-	__u64 cookie_key;
+	struct socket_cookie val;
 	int err = 0;
 	int map_fd;
 
@@ -101,17 +105,7 @@ static int validate_map(struct bpf_map *map, int client_fd)
 
 	map_fd = bpf_map__fd(map);
 
-	err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
-	if (err) {
-		log_err("Can't get cookie key from map");
-		goto out;
-	}
-
-	err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
-	if (err) {
-		log_err("Can't get cookie value from map");
-		goto out;
-	}
+	err = bpf_map_lookup_elem(map_fd, &client_fd, &val);
 
 	err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
 	if (err) {
@@ -120,8 +114,8 @@ static int validate_map(struct bpf_map *map, int client_fd)
 	}
 
 	cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
-	if (cookie_value != cookie_expected_value) {
-		log_err("Unexpected value in map: %x != %x", cookie_value,
+	if (val.cookie_value != cookie_expected_value) {
+		log_err("Unexpected value in map: %x != %x", val.cookie_value,
 			cookie_expected_value);
 		goto err;
 	}
@@ -148,6 +142,7 @@ static int run_test(int cgfd)
 	memset(&attr, 0, sizeof(attr));
 	attr.file = SOCKET_COOKIE_PROG;
 	attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
 
 	err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
 	if (err) {
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index e7639f66a941..d008b41b7d8d 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -28,68 +28,61 @@
  * are established and verdicts are decided.
  */
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
-struct bpf_map_def SEC("maps") sock_map = {
-	.type = TEST_MAP_TYPE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_txmsg = {
-	.type = TEST_MAP_TYPE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_redir = {
-	.type = TEST_MAP_TYPE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_apply_bytes = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_cork_bytes = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_bytes = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 6
-};
-
-struct bpf_map_def SEC("maps") sock_redir_flags = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_skb_opts = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map SEC(".maps");
+
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_txmsg SEC(".maps");
+
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_redir SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_apply_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_cork_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 6);
+	__type(key, int);
+	__type(value, int);
+} sock_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_redir_flags SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_skb_opts SEC(".maps");
 
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/test_sockopt.c b/tools/testing/selftests/bpf/test_sockopt.c
new file mode 100644
index 000000000000..23bd0819382d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt.c
@@ -0,0 +1,1021 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH				"/sockopt"
+
+static char bpf_log_buf[4096];
+static bool verbose;
+
+enum sockopt_test_error {
+	OK = 0,
+	DENY_LOAD,
+	DENY_ATTACH,
+	EPERM_GETSOCKOPT,
+	EFAULT_GETSOCKOPT,
+	EPERM_SETSOCKOPT,
+	EFAULT_SETSOCKOPT,
+};
+
+static struct sockopt_test {
+	const char			*descr;
+	const struct bpf_insn		insns[64];
+	enum bpf_attach_type		attach_type;
+	enum bpf_attach_type		expected_attach_type;
+
+	int				set_optname;
+	int				set_level;
+	const char			set_optval[64];
+	socklen_t			set_optlen;
+
+	int				get_optname;
+	int				get_level;
+	const char			get_optval[64];
+	socklen_t			get_optlen;
+	socklen_t			get_optlen_ret;
+
+	enum sockopt_test_error		error;
+} tests[] = {
+
+	/* ==================== getsockopt ====================  */
+
+	{
+		.descr = "getsockopt: no expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = 0,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: wrong expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+		.error = DENY_ATTACH,
+	},
+	{
+		.descr = "getsockopt: bypass bpf hook",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: return EPERM from bpf hook",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.get_optname = IP_TOS,
+
+		.get_optlen = 1,
+		.error = EPERM_GETSOCKOPT,
+	},
+	{
+		.descr = "getsockopt: no optval bounds check, deny loading",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+
+			/* ctx->optval[0] = 0x80 */
+			BPF_MOV64_IMM(BPF_REG_0, 0x80),
+			BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: read ctx->level",
+		.insns = {
+			/* r6 = ctx->level */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, level)),
+
+			/* if (ctx->level == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = 123,
+
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->level",
+		.insns = {
+			/* ctx->level = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, level)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: read ctx->optname",
+		.insns = {
+			/* r6 = ctx->optname */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optname)),
+
+			/* if (ctx->optname == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optname = 123,
+
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: read ctx->retval",
+		.insns = {
+			/* r6 = ctx->retval */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.get_optname = IP_TOS,
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->optname",
+		.insns = {
+			/* ctx->optname = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optname)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: read ctx->optlen",
+		.insns = {
+			/* r6 = ctx->optlen */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* if (ctx->optlen == 64) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+	},
+	{
+		.descr = "getsockopt: deny bigger ctx->optlen",
+		.insns = {
+			/* ctx->optlen = 65 */
+			BPF_MOV64_IMM(BPF_REG_0, 65),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+
+		.error = EFAULT_GETSOCKOPT,
+	},
+	{
+		.descr = "getsockopt: deny arbitrary ctx->retval",
+		.insns = {
+			/* ctx->retval = 123 */
+			BPF_MOV64_IMM(BPF_REG_0, 123),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+
+		.error = EFAULT_GETSOCKOPT,
+	},
+	{
+		.descr = "getsockopt: support smaller ctx->optlen",
+		.insns = {
+			/* ctx->optlen = 32 */
+			BPF_MOV64_IMM(BPF_REG_0, 32),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+		.get_optlen_ret = 32,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->optval",
+		.insns = {
+			/* ctx->optval = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->optval_end",
+		.insns = {
+			/* ctx->optval_end = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval_end)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: rewrite value",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r2 = ctx->optval */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+			/* r6 = ctx->optval + 1 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+			/* r7 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+			/* ctx->optval[0] = 0xF0 */
+			BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0),
+			/* } */
+
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1*/
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.get_optname = IP_TOS,
+
+		.get_optval = { 0xF0 },
+		.get_optlen = 1,
+	},
+
+	/* ==================== setsockopt ====================  */
+
+	{
+		.descr = "setsockopt: no expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = 0,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: wrong expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+		.error = DENY_ATTACH,
+	},
+	{
+		.descr = "setsockopt: bypass bpf hook",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: return EPERM from bpf hook",
+		.insns = {
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_level = SOL_IP,
+		.set_optname = IP_TOS,
+
+		.set_optlen = 1,
+		.error = EPERM_SETSOCKOPT,
+	},
+	{
+		.descr = "setsockopt: no optval bounds check, deny loading",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+
+			/* r0 = ctx->optval[0] */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: read ctx->level",
+		.insns = {
+			/* r6 = ctx->level */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, level)),
+
+			/* if (ctx->level == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_level = 123,
+
+		.set_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: allow changing ctx->level",
+		.insns = {
+			/* ctx->level = SOL_IP */
+			BPF_MOV64_IMM(BPF_REG_0, SOL_IP),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, level)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = 234, /* should be rewritten to SOL_IP */
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: read ctx->optname",
+		.insns = {
+			/* r6 = ctx->optname */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optname)),
+
+			/* if (ctx->optname == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optname = 123,
+
+		.set_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: allow changing ctx->optname",
+		.insns = {
+			/* ctx->optname = IP_TOS */
+			BPF_MOV64_IMM(BPF_REG_0, IP_TOS),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optname)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = 456, /* should be rewritten to IP_TOS */
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: read ctx->optlen",
+		.insns = {
+			/* r6 = ctx->optlen */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* if (ctx->optlen == 64) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4),
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 64,
+	},
+	{
+		.descr = "setsockopt: ctx->optlen == -1 is ok",
+		.insns = {
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 64,
+	},
+	{
+		.descr = "setsockopt: deny ctx->optlen < 0 (except -1)",
+		.insns = {
+			/* ctx->optlen = -2 */
+			BPF_MOV64_IMM(BPF_REG_0, -2),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 4,
+
+		.error = EFAULT_SETSOCKOPT,
+	},
+	{
+		.descr = "setsockopt: deny ctx->optlen > input optlen",
+		.insns = {
+			/* ctx->optlen = 65 */
+			BPF_MOV64_IMM(BPF_REG_0, 65),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 64,
+
+		.error = EFAULT_SETSOCKOPT,
+	},
+	{
+		.descr = "setsockopt: allow changing ctx->optlen within bounds",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r2 = ctx->optval */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+			/* r6 = ctx->optval + 1 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+			/* r7 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+			/* ctx->optval[0] = 1 << 3 */
+			BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 1 << 3),
+			/* } */
+
+			/* ctx->optlen = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* return 1*/
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1, 1, 1, 1 },
+		.set_optlen = 4,
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: deny write ctx->retval",
+		.insns = {
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: deny read ctx->retval",
+		.insns = {
+			/* r6 = ctx->retval */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: deny writing to ctx->optval",
+		.insns = {
+			/* ctx->optval = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: deny writing to ctx->optval_end",
+		.insns = {
+			/* ctx->optval_end = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval_end)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: allow IP_TOS <= 128",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r7 = ctx->optval + 1 */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1),
+
+			/* r8 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4),
+
+			/* r9 = ctx->optval[0] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0),
+
+			/* if (ctx->optval[0] < 128) */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } */
+
+			/* } else { */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 0x80 },
+		.set_optlen = 1,
+		.get_optval = { 0x80 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: deny IP_TOS > 128",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r7 = ctx->optval + 1 */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1),
+
+			/* r8 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4),
+
+			/* r9 = ctx->optval[0] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0),
+
+			/* if (ctx->optval[0] < 128) */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } */
+
+			/* } else { */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 0x81 },
+		.set_optlen = 1,
+		.get_optval = { 0x00 },
+		.get_optlen = 1,
+
+		.error = EPERM_SETSOCKOPT,
+	},
+};
+
+static int load_prog(const struct bpf_insn *insns,
+		     enum bpf_attach_type expected_attach_type)
+{
+	struct bpf_load_program_attr attr = {
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+		.expected_attach_type = expected_attach_type,
+		.insns = insns,
+		.license = "GPL",
+		.log_level = 2,
+	};
+	int fd;
+
+	for (;
+	     insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT);
+	     attr.insns_cnt++) {
+	}
+	attr.insns_cnt++;
+
+	fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf));
+	if (verbose && fd < 0)
+		fprintf(stderr, "%s\n", bpf_log_buf);
+
+	return fd;
+}
+
+static int run_test(int cgroup_fd, struct sockopt_test *test)
+{
+	int sock_fd, err, prog_fd;
+	void *optval = NULL;
+	int ret = 0;
+
+	prog_fd = load_prog(test->insns, test->expected_attach_type);
+	if (prog_fd < 0) {
+		if (test->error == DENY_LOAD)
+			return 0;
+
+		log_err("Failed to load BPF program");
+		return -1;
+	}
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+	if (err < 0) {
+		if (test->error == DENY_ATTACH)
+			goto close_prog_fd;
+
+		log_err("Failed to attach BPF program");
+		ret = -1;
+		goto close_prog_fd;
+	}
+
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		log_err("Failed to create AF_INET socket");
+		ret = -1;
+		goto detach_prog;
+	}
+
+	if (test->set_optlen) {
+		err = setsockopt(sock_fd, test->set_level, test->set_optname,
+				 test->set_optval, test->set_optlen);
+		if (err) {
+			if (errno == EPERM && test->error == EPERM_SETSOCKOPT)
+				goto close_sock_fd;
+			if (errno == EFAULT && test->error == EFAULT_SETSOCKOPT)
+				goto free_optval;
+
+			log_err("Failed to call setsockopt");
+			ret = -1;
+			goto close_sock_fd;
+		}
+	}
+
+	if (test->get_optlen) {
+		optval = malloc(test->get_optlen);
+		socklen_t optlen = test->get_optlen;
+		socklen_t expected_get_optlen = test->get_optlen_ret ?:
+			test->get_optlen;
+
+		err = getsockopt(sock_fd, test->get_level, test->get_optname,
+				 optval, &optlen);
+		if (err) {
+			if (errno == EPERM && test->error == EPERM_GETSOCKOPT)
+				goto free_optval;
+			if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT)
+				goto free_optval;
+
+			log_err("Failed to call getsockopt");
+			ret = -1;
+			goto free_optval;
+		}
+
+		if (optlen != expected_get_optlen) {
+			errno = 0;
+			log_err("getsockopt returned unexpected optlen");
+			ret = -1;
+			goto free_optval;
+		}
+
+		if (memcmp(optval, test->get_optval, optlen) != 0) {
+			errno = 0;
+			log_err("getsockopt returned unexpected optval");
+			ret = -1;
+			goto free_optval;
+		}
+	}
+
+	ret = test->error != OK;
+
+free_optval:
+	free(optval);
+close_sock_fd:
+	close(sock_fd);
+detach_prog:
+	bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+close_prog_fd:
+	close(prog_fd);
+	return ret;
+}
+
+int main(int args, char **argv)
+{
+	int err = EXIT_FAILURE, error_cnt = 0;
+	int cgroup_fd, i;
+
+	if (setup_cgroup_environment())
+		goto cleanup_obj;
+
+	cgroup_fd = create_and_get_cgroup(CG_PATH);
+	if (cgroup_fd < 0)
+		goto cleanup_cgroup_env;
+
+	if (join_cgroup(CG_PATH))
+		goto cleanup_cgroup;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		int err = run_test(cgroup_fd, &tests[i]);
+
+		if (err)
+			error_cnt++;
+
+		printf("#%d %s: %s\n", i, err ? "FAIL" : "PASS",
+		       tests[i].descr);
+	}
+
+	printf("Summary: %ld PASSED, %d FAILED\n",
+	       ARRAY_SIZE(tests) - error_cnt, error_cnt);
+	err = error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+
+cleanup_cgroup:
+	close(cgroup_fd);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+cleanup_obj:
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_multi.c b/tools/testing/selftests/bpf/test_sockopt_multi.c
new file mode 100644
index 000000000000..4be3441db867
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_multi.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+	enum bpf_attach_type attach_type;
+	enum bpf_prog_type prog_type;
+	struct bpf_program *prog;
+	int err;
+
+	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+	if (err) {
+		log_err("Failed to deduct types for %s BPF program", title);
+		return -1;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, title);
+	if (!prog) {
+		log_err("Failed to find %s BPF program", title);
+		return -1;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+			      attach_type, BPF_F_ALLOW_MULTI);
+	if (err) {
+		log_err("Failed to attach %s BPF program", title);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+	enum bpf_attach_type attach_type;
+	enum bpf_prog_type prog_type;
+	struct bpf_program *prog;
+	int err;
+
+	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+	if (err)
+		return -1;
+
+	prog = bpf_object__find_program_by_title(obj, title);
+	if (!prog)
+		return -1;
+
+	err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd,
+			       attach_type);
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
+			       int cg_child, int sock_fd)
+{
+	socklen_t optlen;
+	__u8 buf;
+	int err;
+
+	/* Set IP_TOS to the expected value (0x80). */
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach child program and make sure it returns new value:
+	 * - kernel:      -> 0x80
+	 * - child:  0x80 -> 0x90
+	 */
+
+	err = prog_attach(obj, cg_child, "cgroup/getsockopt/child");
+	if (err)
+		goto detach;
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x90) {
+		log_err("Unexpected getsockopt 0x%x != 0x90", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach parent program and make sure it returns new value:
+	 * - kernel:      -> 0x80
+	 * - child:  0x80 -> 0x90
+	 * - parent: 0x90 -> 0xA0
+	 */
+
+	err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent");
+	if (err)
+		goto detach;
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0xA0) {
+		log_err("Unexpected getsockopt 0x%x != 0xA0", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Setting unexpected initial sockopt should return EPERM:
+	 * - kernel: -> 0x40
+	 * - child:  unexpected 0x40, EPERM
+	 * - parent: unexpected 0x40, EPERM
+	 */
+
+	buf = 0x40;
+	if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (!err) {
+		log_err("Unexpected success from getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	/* Detach child program and make sure we still get EPERM:
+	 * - kernel: -> 0x40
+	 * - parent: unexpected 0x40, EPERM
+	 */
+
+	err = prog_detach(obj, cg_child, "cgroup/getsockopt/child");
+	if (err) {
+		log_err("Failed to detach child program");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (!err) {
+		log_err("Unexpected success from getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	/* Set initial value to the one the parent program expects:
+	 * - kernel:      -> 0x90
+	 * - parent: 0x90 -> 0xA0
+	 */
+
+	buf = 0x90;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0xA0) {
+		log_err("Unexpected getsockopt 0x%x != 0xA0", buf);
+		err = -1;
+		goto detach;
+	}
+
+detach:
+	prog_detach(obj, cg_child, "cgroup/getsockopt/child");
+	prog_detach(obj, cg_parent, "cgroup/getsockopt/parent");
+
+	return err;
+}
+
+static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
+			       int cg_child, int sock_fd)
+{
+	socklen_t optlen;
+	__u8 buf;
+	int err;
+
+	/* Set IP_TOS to the expected value (0x80). */
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach child program and make sure it adds 0x10. */
+
+	err = prog_attach(obj, cg_child, "cgroup/setsockopt");
+	if (err)
+		goto detach;
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80 + 0x10) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 + 0x10", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach parent program and make sure it adds another 0x10. */
+
+	err = prog_attach(obj, cg_parent, "cgroup/setsockopt");
+	if (err)
+		goto detach;
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80 + 2 * 0x10) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 + 2 * 0x10", buf);
+		err = -1;
+		goto detach;
+	}
+
+detach:
+	prog_detach(obj, cg_child, "cgroup/setsockopt");
+	prog_detach(obj, cg_parent, "cgroup/setsockopt");
+
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	struct bpf_prog_load_attr attr = {
+		.file = "./sockopt_multi.o",
+	};
+	int cg_parent = -1, cg_child = -1;
+	struct bpf_object *obj = NULL;
+	int sock_fd = -1;
+	int err = -1;
+	int ignored;
+
+	if (setup_cgroup_environment()) {
+		log_err("Failed to setup cgroup environment\n");
+		goto out;
+	}
+
+	cg_parent = create_and_get_cgroup("/parent");
+	if (cg_parent < 0) {
+		log_err("Failed to create cgroup /parent\n");
+		goto out;
+	}
+
+	cg_child = create_and_get_cgroup("/parent/child");
+	if (cg_child < 0) {
+		log_err("Failed to create cgroup /parent/child\n");
+		goto out;
+	}
+
+	if (join_cgroup("/parent/child")) {
+		log_err("Failed to join cgroup /parent/child\n");
+		goto out;
+	}
+
+	err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+	if (err) {
+		log_err("Failed to load BPF object");
+		goto out;
+	}
+
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		log_err("Failed to create socket");
+		goto out;
+	}
+
+	if (run_getsockopt_test(obj, cg_parent, cg_child, sock_fd))
+		err = -1;
+	printf("test_sockopt_multi: getsockopt %s\n",
+	       err ? "FAILED" : "PASSED");
+
+	if (run_setsockopt_test(obj, cg_parent, cg_child, sock_fd))
+		err = -1;
+	printf("test_sockopt_multi: setsockopt %s\n",
+	       err ? "FAILED" : "PASSED");
+
+out:
+	close(sock_fd);
+	bpf_object__close(obj);
+	close(cg_child);
+	close(cg_parent);
+
+	printf("test_sockopt_multi: %s\n", err ? "FAILED" : "PASSED");
+	return err ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/test_sockopt_sk.c
new file mode 100644
index 000000000000..036b652e5ca9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_sk.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH				"/sockopt"
+
+#define SOL_CUSTOM			0xdeadbeef
+
+static int getsetsockopt(void)
+{
+	int fd, err;
+	union {
+		char u8[4];
+		__u32 u32;
+	} buf = {};
+	socklen_t optlen;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create socket");
+		return -1;
+	}
+
+	/* IP_TOS - BPF bypass */
+
+	buf.u8[0] = 0x08;
+	err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto err;
+	}
+
+	buf.u8[0] = 0x00;
+	optlen = 1;
+	err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto err;
+	}
+
+	if (buf.u8[0] != 0x08) {
+		log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08",
+			buf.u8[0]);
+		goto err;
+	}
+
+	/* IP_TTL - EPERM */
+
+	buf.u8[0] = 1;
+	err = setsockopt(fd, SOL_IP, IP_TTL, &buf, 1);
+	if (!err || errno != EPERM) {
+		log_err("Unexpected success from setsockopt(IP_TTL)");
+		goto err;
+	}
+
+	/* SOL_CUSTOM - handled by BPF */
+
+	buf.u8[0] = 0x01;
+	err = setsockopt(fd, SOL_CUSTOM, 0, &buf, 1);
+	if (err) {
+		log_err("Failed to call setsockopt");
+		goto err;
+	}
+
+	buf.u32 = 0x00;
+	optlen = 4;
+	err = getsockopt(fd, SOL_CUSTOM, 0, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt");
+		goto err;
+	}
+
+	if (optlen != 1) {
+		log_err("Unexpected optlen %d != 1", optlen);
+		goto err;
+	}
+	if (buf.u8[0] != 0x01) {
+		log_err("Unexpected buf[0] 0x%02x != 0x01", buf.u8[0]);
+		goto err;
+	}
+
+	/* SO_SNDBUF is overwritten */
+
+	buf.u32 = 0x01010101;
+	err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, 4);
+	if (err) {
+		log_err("Failed to call setsockopt(SO_SNDBUF)");
+		goto err;
+	}
+
+	buf.u32 = 0x00;
+	optlen = 4;
+	err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(SO_SNDBUF)");
+		goto err;
+	}
+
+	if (buf.u32 != 0x55AA*2) {
+		log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2",
+			buf.u32);
+		goto err;
+	}
+
+	close(fd);
+	return 0;
+err:
+	close(fd);
+	return -1;
+}
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+	enum bpf_attach_type attach_type;
+	enum bpf_prog_type prog_type;
+	struct bpf_program *prog;
+	int err;
+
+	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+	if (err) {
+		log_err("Failed to deduct types for %s BPF program", title);
+		return -1;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, title);
+	if (!prog) {
+		log_err("Failed to find %s BPF program", title);
+		return -1;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+			      attach_type, 0);
+	if (err) {
+		log_err("Failed to attach %s BPF program", title);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int run_test(int cgroup_fd)
+{
+	struct bpf_prog_load_attr attr = {
+		.file = "./sockopt_sk.o",
+	};
+	struct bpf_object *obj;
+	int ignored;
+	int err;
+
+	err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+	if (err)
+		goto close_bpf_object;
+
+	err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+	if (err)
+		goto close_bpf_object;
+
+	err = getsetsockopt();
+
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+int main(int args, char **argv)
+{
+	int cgroup_fd;
+	int err = EXIT_SUCCESS;
+
+	if (setup_cgroup_environment())
+		goto cleanup_obj;
+
+	cgroup_fd = create_and_get_cgroup(CG_PATH);
+	if (cgroup_fd < 0)
+		goto cleanup_cgroup_env;
+
+	if (join_cgroup(CG_PATH))
+		goto cleanup_cgroup;
+
+	if (run_test(cgroup_fd))
+		err = EXIT_FAILURE;
+
+	printf("test_sockopt_sk: %s\n",
+	       err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+	close(cgroup_fd);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+cleanup_obj:
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c
new file mode 100644
index 000000000000..84e81a89e2f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stub.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <string.h>
+
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+		       struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_prog_load_attr attr;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = file;
+	attr.prog_type = type;
+	attr.expected_attach_type = 0;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
+
+	return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+			  size_t insns_cnt, const char *license,
+			  __u32 kern_version, char *log_buf,
+		     size_t log_buf_sz)
+{
+	struct bpf_load_program_attr load_attr;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = type;
+	load_attr.expected_attach_type = 0;
+	load_attr.name = NULL;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	load_attr.kern_version = kern_version;
+	load_attr.prog_flags = BPF_F_TEST_RND_HI32;
+
+	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/test_tcp_rtt.c
new file mode 100644
index 000000000000..90c3862f74a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_rtt.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pthread.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH                                "/tcp_rtt"
+
+struct tcp_rtt_storage {
+	__u32 invoked;
+	__u32 dsack_dups;
+	__u32 delivered;
+	__u32 delivered_ce;
+	__u32 icsk_retransmits;
+};
+
+static void send_byte(int fd)
+{
+	char b = 0x55;
+
+	if (write(fd, &b, sizeof(b)) != 1)
+		error(1, errno, "Failed to send single byte");
+}
+
+static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
+		     __u32 dsack_dups, __u32 delivered, __u32 delivered_ce,
+		     __u32 icsk_retransmits)
+{
+	int err = 0;
+	struct tcp_rtt_storage val;
+
+	if (bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)
+		error(1, errno, "Failed to read socket storage");
+
+	if (val.invoked != invoked) {
+		log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d",
+			msg, val.invoked, invoked);
+		err++;
+	}
+
+	if (val.dsack_dups != dsack_dups) {
+		log_err("%s: unexpected bpf_tcp_sock.dsack_dups %d != %d",
+			msg, val.dsack_dups, dsack_dups);
+		err++;
+	}
+
+	if (val.delivered != delivered) {
+		log_err("%s: unexpected bpf_tcp_sock.delivered %d != %d",
+			msg, val.delivered, delivered);
+		err++;
+	}
+
+	if (val.delivered_ce != delivered_ce) {
+		log_err("%s: unexpected bpf_tcp_sock.delivered_ce %d != %d",
+			msg, val.delivered_ce, delivered_ce);
+		err++;
+	}
+
+	if (val.icsk_retransmits != icsk_retransmits) {
+		log_err("%s: unexpected bpf_tcp_sock.icsk_retransmits %d != %d",
+			msg, val.icsk_retransmits, icsk_retransmits);
+		err++;
+	}
+
+	return err;
+}
+
+static int connect_to_server(int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		goto out;
+	}
+
+	if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+		log_err("Fail to connect to server");
+		goto out;
+	}
+
+	return fd;
+
+out:
+	close(fd);
+	return -1;
+}
+
+static int run_test(int cgroup_fd, int server_fd)
+{
+	struct bpf_prog_load_attr attr = {
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+		.file = "./tcp_rtt.o",
+		.expected_attach_type = BPF_CGROUP_SOCK_OPS,
+	};
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	int client_fd;
+	int prog_fd;
+	int map_fd;
+	int err;
+
+	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	map = bpf_map__next(NULL, obj);
+	map_fd = bpf_map__fd(map);
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	client_fd = connect_to_server(server_fd);
+	if (client_fd < 0) {
+		err = -1;
+		goto close_bpf_object;
+	}
+
+	err += verify_sk(map_fd, client_fd, "syn-ack",
+			 /*invoked=*/1,
+			 /*dsack_dups=*/0,
+			 /*delivered=*/1,
+			 /*delivered_ce=*/0,
+			 /*icsk_retransmits=*/0);
+
+	send_byte(client_fd);
+
+	err += verify_sk(map_fd, client_fd, "first payload byte",
+			 /*invoked=*/2,
+			 /*dsack_dups=*/0,
+			 /*delivered=*/2,
+			 /*delivered_ce=*/0,
+			 /*icsk_retransmits=*/0);
+
+	close(client_fd);
+
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+static int start_server(void)
+{
+	struct sockaddr_in addr = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+	};
+	int fd;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create server socket");
+		return -1;
+	}
+
+	if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+		log_err("Failed to bind socket");
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+static void *server_thread(void *arg)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd = *(int *)arg;
+	int client_fd;
+
+	if (listen(fd, 1) < 0)
+		error(1, errno, "Failed to listed on socket");
+
+	client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+	if (client_fd < 0)
+		error(1, errno, "Failed to accept client");
+
+	/* Wait for the next connection (that never arrives)
+	 * to keep this thread alive to prevent calling
+	 * close() on client_fd.
+	 */
+	if (accept(fd, (struct sockaddr *)&addr, &len) >= 0)
+		error(1, errno, "Unexpected success in second accept");
+
+	close(client_fd);
+
+	return NULL;
+}
+
+int main(int args, char **argv)
+{
+	int server_fd, cgroup_fd;
+	int err = EXIT_SUCCESS;
+	pthread_t tid;
+
+	if (setup_cgroup_environment())
+		goto cleanup_obj;
+
+	cgroup_fd = create_and_get_cgroup(CG_PATH);
+	if (cgroup_fd < 0)
+		goto cleanup_cgroup_env;
+
+	if (join_cgroup(CG_PATH))
+		goto cleanup_cgroup;
+
+	server_fd = start_server();
+	if (server_fd < 0) {
+		err = EXIT_FAILURE;
+		goto cleanup_cgroup;
+	}
+
+	pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+
+	if (run_test(cgroup_fd, server_fd))
+		err = EXIT_FAILURE;
+
+	close(server_fd);
+
+	printf("test_sockopt_sk: %s\n",
+	       err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+	close(cgroup_fd);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+cleanup_obj:
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 546aee3e9fb4..bd12ec97a44d 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -696,30 +696,57 @@ check_err()
 
 bpf_tunnel_test()
 {
+	local errors=0
+
 	echo "Testing GRE tunnel..."
 	test_gre
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6GRE tunnel..."
 	test_ip6gre
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6GRETAP tunnel..."
 	test_ip6gretap
+	errors=$(( $errors + $? ))
+
 	echo "Testing ERSPAN tunnel..."
 	test_erspan v2
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6ERSPAN tunnel..."
 	test_ip6erspan v2
+	errors=$(( $errors + $? ))
+
 	echo "Testing VXLAN tunnel..."
 	test_vxlan
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6VXLAN tunnel..."
 	test_ip6vxlan
+	errors=$(( $errors + $? ))
+
 	echo "Testing GENEVE tunnel..."
 	test_geneve
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6GENEVE tunnel..."
 	test_ip6geneve
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPIP tunnel..."
 	test_ipip
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPIP6 tunnel..."
 	test_ipip6
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPSec tunnel..."
 	test_xfrm_tunnel
+	errors=$(( $errors + $? ))
+
+	return $errors
 }
 
 trap cleanup 0 3 6
@@ -728,4 +755,9 @@ trap cleanup_exit 2 9
 cleanup
 bpf_tunnel_test
 
+if [ $? -ne 0 ]; then
+	echo -e "$(basename $0): ${RED}FAIL${NC}"
+	exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
 exit 0
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 288cb740e005..b0773291012a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -105,6 +105,7 @@ struct bpf_test {
 			__u64 data64[TEST_DATA_LEN / 8];
 		};
 	} retvals[MAX_TEST_RUNS];
+	enum bpf_attach_type expected_attach_type;
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -135,32 +136,36 @@ static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
 loop:
 	for (j = 0; j < PUSH_CNT; j++) {
 		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		/* jump to error label */
+		insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3);
 		i++;
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
 		insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
 		insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_skb_vlan_push),
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
 		i++;
 	}
 
 	for (j = 0; j < PUSH_CNT; j++) {
 		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3);
 		i++;
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_skb_vlan_pop),
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
 		i++;
 	}
 	if (++k < 5)
 		goto loop;
 
-	for (; i < len - 1; i++)
-		insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+	for (; i < len - 3; i++)
+		insn[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+	insn[len - 3] = BPF_JMP_A(1);
+	/* error label */
+	insn[len - 2] = BPF_MOV32_IMM(BPF_REG_0, 0);
 	insn[len - 1] = BPF_EXIT_INSN();
 	self->prog_len = len;
 }
@@ -168,8 +173,13 @@ loop:
 static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
-	/* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */
-	unsigned int len = (1 << 15) / 6;
+	/* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns,
+	 * but on arches like arm, ppc etc, there will be one BPF_ZEXT inserted
+	 * to extend the error value of the inlined ld_abs sequence which then
+	 * contains 7 insns. so, set the dividend to 7 so the testcase could
+	 * work on all arches.
+	 */
+	unsigned int len = (1 << 15) / 7;
 	int i = 0;
 
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
@@ -207,33 +217,35 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	self->retval = (uint32_t)res;
 }
 
-/* test the sequence of 1k jumps */
+#define MAX_JMP_SEQ 8192
+
+/* test the sequence of 8k jumps */
 static void bpf_fill_scale1(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
 	int i = 0, k = 0;
 
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % 64 + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	/* is_state_visited() doesn't allocate state for pruning for every jump.
+	 * Hence multiply jmps by 4 to accommodate that heuristic
 	 */
-	while (i < MAX_TEST_INSNS - 1025)
-		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
+		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
 	self->retval = 42;
 }
 
-/* test the sequence of 1k jumps in inner most function (function depth 8)*/
+/* test the sequence of 8k jumps in inner most function (function depth 8)*/
 static void bpf_fill_scale2(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
@@ -245,20 +257,18 @@ static void bpf_fill_scale2(struct bpf_test *self)
 		insn[i++] = BPF_EXIT_INSN();
 	}
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	k = 0;
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % (64 - 4 * FUNC_NEST) + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
-	 */
-	while (i < MAX_TEST_INSNS - 1025)
-		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
+		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
 	self->retval = 42;
@@ -841,6 +851,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int fd_prog, expected_ret, alignment_prevented_execution;
 	int prog_len, prog_type = test->prog_type;
 	struct bpf_insn *prog = test->insns;
+	struct bpf_load_program_attr attr;
 	int run_errs, run_successes;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
@@ -867,13 +878,22 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (fixup_skips != skips)
 		return;
 
-	pflags = 0;
+	pflags = BPF_F_TEST_RND_HI32;
 	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
 		pflags |= BPF_F_ANY_ALIGNMENT;
-	fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
-				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = prog_type;
+	attr.expected_attach_type = test->expected_attach_type;
+	attr.insns = prog;
+	attr.insns_cnt = prog_len;
+	attr.license = "GPL";
+	attr.log_level = 4;
+	attr.prog_flags = pflags;
+
+	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
 	if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
@@ -903,7 +923,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 			printf("FAIL\nUnexpected success to load!\n");
 			goto fail_log;
 		}
-		if (!strstr(bpf_vlog, expected_err)) {
+		if (!expected_err || !strstr(bpf_vlog, expected_err)) {
 			printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
 			      expected_err, bpf_vlog);
 			goto fail_log;
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
new file mode 100755
index 000000000000..ba8ffcdaac30
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Create 3 namespaces with 3 veth peers, and
+# forward packets in-between using native XDP
+#
+#                      XDP_TX
+# NS1(veth11)        NS2(veth22)        NS3(veth33)
+#      |                  |                  |
+#      |                  |                  |
+#   (veth1,            (veth2,            (veth3,
+#   id:111)            id:122)            id:133)
+#     ^ |                ^ |                ^ |
+#     | |  XDP_REDIRECT  | |  XDP_REDIRECT  | |
+#     | ------------------ ------------------ |
+#     -----------------------------------------
+#                    XDP_REDIRECT
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME=xdp_veth
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+	set +e
+	ip link del veth1 2> /dev/null
+	ip link del veth2 2> /dev/null
+	ip link del veth3 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+	ip netns del ns3 2> /dev/null
+	rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+	echo "selftests: $TESTNAME [SKIP]"
+	_cleanup
+
+	exit $ksft_skip
+}
+
+cleanup()
+{
+	if [ "$?" = 0 ]; then
+		echo "selftests: $TESTNAME [PASS]"
+	else
+		echo "selftests: $TESTNAME [FAILED]"
+	fi
+	_cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+	echo "selftests: $TESTNAME [SKIP] Need root privileges"
+	exit $ksft_skip
+fi
+
+if ! ip link set dev lo xdp off > /dev/null 2>&1; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without the ip xdp support"
+	exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+	exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+	exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+
+ip link add veth1 index 111 type veth peer name veth11 netns ns1
+ip link add veth2 index 122 type veth peer name veth22 netns ns2
+ip link add veth3 index 133 type veth peer name veth33 netns ns3
+
+ip link set veth1 up
+ip link set veth2 up
+ip link set veth3 up
+
+ip -n ns1 addr add 10.1.1.11/24 dev veth11
+ip -n ns3 addr add 10.1.1.33/24 dev veth33
+
+ip -n ns1 link set dev veth11 up
+ip -n ns2 link set dev veth22 up
+ip -n ns3 link set dev veth33 up
+
+mkdir $BPF_DIR
+bpftool prog loadall \
+	xdp_redirect_map.o $BPF_DIR/progs type xdp \
+	pinmaps $BPF_DIR/maps
+bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0
+bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0
+bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0
+ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
+ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
+ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
+
+ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
+
+trap cleanup EXIT
+
+ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
new file mode 100755
index 000000000000..c2f0ddb45531
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdping.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# xdping tests
+#   Here we setup and teardown configuration required to run
+#   xdping, exercising its options.
+#
+#   Setup is similar to test_tunnel tests but without the tunnel.
+#
+# Topology:
+# ---------
+#     root namespace   |     tc_ns0 namespace
+#                      |
+#      ----------      |     ----------
+#      |  veth1  | --------- |  veth0  |
+#      ----------    peer    ----------
+#
+# Device Configuration
+# --------------------
+# Root namespace with BPF
+# Device names and addresses:
+#	veth1 IP: 10.1.1.200
+#	xdp added to veth1, xdpings originate from here.
+#
+# Namespace tc_ns0 with BPF
+# Device names and addresses:
+#       veth0 IPv4: 10.1.1.100
+#	For some tests xdping run in server mode here.
+#
+
+readonly TARGET_IP="10.1.1.100"
+readonly TARGET_NS="xdp_ns0"
+
+readonly LOCAL_IP="10.1.1.200"
+
+setup()
+{
+	ip netns add $TARGET_NS
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 netns $TARGET_NS
+	ip netns exec $TARGET_NS ip addr add ${TARGET_IP}/24 dev veth0
+	ip addr add ${LOCAL_IP}/24 dev veth1
+	ip netns exec $TARGET_NS ip link set veth0 up
+	ip link set veth1 up
+}
+
+cleanup()
+{
+	set +e
+	ip netns delete $TARGET_NS 2>/dev/null
+	ip link del veth1 2>/dev/null
+	if [[ $server_pid -ne 0 ]]; then
+		kill -TERM $server_pid
+	fi
+}
+
+test()
+{
+	client_args="$1"
+	server_args="$2"
+
+	echo "Test client args '$client_args'; server args '$server_args'"
+
+	server_pid=0
+	if [[ -n "$server_args" ]]; then
+		ip netns exec $TARGET_NS ./xdping $server_args &
+		server_pid=$!
+		sleep 10
+	fi
+	./xdping $client_args $TARGET_IP
+
+	if [[ $server_pid -ne 0 ]]; then
+		kill -TERM $server_pid
+		server_pid=0
+	fi
+
+	echo "Test client args '$client_args'; server args '$server_args': PASS"
+}
+
+set -e
+
+server_pid=0
+
+trap cleanup EXIT
+
+setup
+
+for server_args in "" "-I veth0 -s -S" ; do
+	# client in skb mode
+	client_args="-I veth1 -S"
+	test "$client_args" "$server_args"
+
+	# client with count of 10 RTT measurements.
+	client_args="-I veth1 -S -c 10"
+	test "$client_args" "$server_args"
+done
+
+echo "OK. All tests passed"
+exit 0
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 9a9fc6c9b70b..b47f205f0310 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -30,9 +30,7 @@ int load_kallsyms(void)
 	if (!f)
 		return -ENOENT;
 
-	while (!feof(f)) {
-		if (!fgets(buf, sizeof(buf), f))
-			break;
+	while (fgets(buf, sizeof(buf), f)) {
 		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
 			break;
 		if (!addr)
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9093a8f64dc6..2d752c4f8d9d 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -215,9 +215,11 @@
 	BPF_MOV64_IMM(BPF_REG_0, 3),
 	BPF_JMP_IMM(BPF_JA, 0, 0, -6),
 	},
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge from insn",
-	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+	.errstr_unpriv = "back-edge from insn",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+	.retval = 1,
 },
 {
 	"calls: conditional call 4",
@@ -250,22 +252,24 @@
 	BPF_MOV64_IMM(BPF_REG_0, 3),
 	BPF_EXIT_INSN(),
 	},
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge from insn",
-	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.retval = 1,
 },
 {
 	"calls: conditional call 6",
 	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
 	BPF_EXIT_INSN(),
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, mark)),
 	BPF_EXIT_INSN(),
 	},
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge from insn",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.errstr = "infinite loop detected",
 	.result = REJECT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c
index 349c0862fb4c..4eb76ed739ce 100644
--- a/tools/testing/selftests/bpf/verifier/cfg.c
+++ b/tools/testing/selftests/bpf/verifier/cfg.c
@@ -41,7 +41,8 @@
 	BPF_JMP_IMM(BPF_JA, 0, 0, -1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "back-edge",
+	.errstr = "unreachable insn 1",
+	.errstr_unpriv = "back-edge",
 	.result = REJECT,
 },
 {
@@ -53,18 +54,20 @@
 	BPF_JMP_IMM(BPF_JA, 0, 0, -4),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "back-edge",
+	.errstr = "unreachable insn 4",
+	.errstr_unpriv = "back-edge",
 	.result = REJECT,
 },
 {
 	"conditional loop",
 	.insns = {
-	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "back-edge",
+	.errstr = "infinite loop detected",
+	.errstr_unpriv = "back-edge",
 	.result = REJECT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index d5c596fdc4b9..2c5fbe7bcd27 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -511,7 +511,8 @@
 		    offsetof(struct __sk_buff, data)),
 	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
 		    offsetof(struct __sk_buff, data_end)),
-	BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, mark)),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 1f39d845c64f..67ab12410050 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -29,9 +29,9 @@
 {
 	"helper access to variable memory: stack, bitwise AND, zero included",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -46,9 +46,9 @@
 {
 	"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
@@ -122,9 +122,9 @@
 {
 	"helper access to variable memory: stack, JMP, bounds + offset",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
@@ -143,9 +143,9 @@
 {
 	"helper access to variable memory: stack, JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
@@ -163,9 +163,9 @@
 {
 	"helper access to variable memory: stack, JMP, no max check",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_MOV64_IMM(BPF_REG_4, 0),
@@ -183,9 +183,9 @@
 {
 	"helper access to variable memory: stack, JMP, no min check",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
@@ -201,9 +201,9 @@
 {
 	"helper access to variable memory: stack, JMP (signed), no min check",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
@@ -244,6 +244,7 @@
 {
 	"helper access to variable memory: map, JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -251,7 +252,7 @@
 	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4),
@@ -262,7 +263,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_48b = { 3 },
+	.fixup_map_hash_48b = { 4 },
 	.errstr = "invalid access to map value, value_size=48 off=0 size=49",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -296,6 +297,7 @@
 {
 	"helper access to variable memory: map adjusted, JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -304,7 +306,7 @@
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
-	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4),
@@ -315,7 +317,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_48b = { 3 },
+	.fixup_map_hash_48b = { 4 },
 	.errstr = "R1 min value is outside of the array range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -337,8 +339,8 @@
 {
 	"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
 	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
 	BPF_MOV64_IMM(BPF_REG_1, 0),
-	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -562,6 +564,7 @@
 {
 	"helper access to variable memory: 8 bytes leak",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -572,7 +575,6 @@
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
-	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c
new file mode 100644
index 000000000000..5e980a5ab69d
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/loops1.c
@@ -0,0 +1,161 @@
+{
+	"bounded loop, count to 4",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop, count to 20",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 20, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded loop, count from positive unknown to 4",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop, count from totally unknown to 4",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded loop, count to 4 with equality",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, -2),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded loop, start in the middle",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_JMP_A(1),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+		BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+		BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "back-edge",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop containing a forward jump",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+		BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_0, 0),
+		BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -3),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop that jumps out rather than in",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 10000, 2),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	BPF_JMP_A(-4),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"infinite loop after a conditional jump",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 5),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, 2),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_A(-2),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "program is too large",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded recursion",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "back-edge",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"infinite loop in two jumps",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_JMP_A(0),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "loop detected",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"infinite loop: three-jump trick",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, -11),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "loop detected",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
index bbdba990fefb..da7a4b37cb98 100644
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
@@ -29,21 +29,6 @@
 	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
 },
 {
-	"prevent map lookup in xskmap",
-	.insns = {
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_map_xskmap = { 3 },
-	.result = REJECT,
-	.errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
-	.prog_type = BPF_PROG_TYPE_XDP,
-},
-{
 	"prevent map lookup in stack trace",
 	.insns = {
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index b31cd2cf50d0..9ed192e14f5f 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -498,3 +498,21 @@
 	.result = REJECT,
 	.errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
 },
+{
+	"bpf_map_lookup_elem(xskmap, &key); xs->queue_id",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_xskmap = { 3 },
+	.prog_type = BPF_PROG_TYPE_XDP,
+	.result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/wide_store.c b/tools/testing/selftests/bpf/verifier/wide_store.c
new file mode 100644
index 000000000000..8fe99602ded4
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/wide_store.c
@@ -0,0 +1,36 @@
+#define BPF_SOCK_ADDR(field, off, res, err) \
+{ \
+	"wide store to bpf_sock_addr." #field "[" #off "]", \
+	.insns = { \
+	BPF_MOV64_IMM(BPF_REG_0, 1), \
+	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, \
+		    offsetof(struct bpf_sock_addr, field[off])), \
+	BPF_EXIT_INSN(), \
+	}, \
+	.result = res, \
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
+	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
+	.errstr = err, \
+}
+
+/* user_ip6[0] is u64 aligned */
+BPF_SOCK_ADDR(user_ip6, 0, ACCEPT,
+	      NULL),
+BPF_SOCK_ADDR(user_ip6, 1, REJECT,
+	      "invalid bpf_context access off=12 size=8"),
+BPF_SOCK_ADDR(user_ip6, 2, ACCEPT,
+	      NULL),
+BPF_SOCK_ADDR(user_ip6, 3, REJECT,
+	      "invalid bpf_context access off=20 size=8"),
+
+/* msg_src_ip6[0] is _not_ u64 aligned */
+BPF_SOCK_ADDR(msg_src_ip6, 0, REJECT,
+	      "invalid bpf_context access off=44 size=8"),
+BPF_SOCK_ADDR(msg_src_ip6, 1, ACCEPT,
+	      NULL),
+BPF_SOCK_ADDR(msg_src_ip6, 2, REJECT,
+	      "invalid bpf_context access off=52 size=8"),
+BPF_SOCK_ADDR(msg_src_ip6, 3, REJECT,
+	      "invalid bpf_context access off=56 size=8"),
+
+#undef BPF_SOCK_ADDR
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
new file mode 100644
index 000000000000..d60a343b1371
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+
+#include "bpf/bpf.h"
+#include "bpf/libbpf.h"
+
+#include "xdping.h"
+
+static int ifindex;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+
+static void cleanup(int sig)
+{
+	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	if (sig)
+		exit(1);
+}
+
+static int get_stats(int fd, __u16 count, __u32 raddr)
+{
+	struct pinginfo pinginfo = { 0 };
+	char inaddrbuf[INET_ADDRSTRLEN];
+	struct in_addr inaddr;
+	__u16 i;
+
+	inaddr.s_addr = raddr;
+
+	printf("\nXDP RTT data:\n");
+
+	if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) {
+		perror("bpf_map_lookup elem: ");
+		return 1;
+	}
+
+	for (i = 0; i < count; i++) {
+		if (pinginfo.times[i] == 0)
+			break;
+
+		printf("64 bytes from %s: icmp_seq=%d ttl=64 time=%#.5f ms\n",
+		       inet_ntop(AF_INET, &inaddr, inaddrbuf,
+				 sizeof(inaddrbuf)),
+		       count + i + 1,
+		       (double)pinginfo.times[i]/1000000);
+	}
+
+	if (i < count) {
+		fprintf(stderr, "Expected %d samples, got %d.\n", count, i);
+		return 1;
+	}
+
+	bpf_map_delete_elem(fd, &raddr);
+
+	return 0;
+}
+
+static void show_usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] -I interface destination\n\n"
+		"OPTS:\n"
+		"    -c count		Stop after sending count requests\n"
+		"			(default %d, max %d)\n"
+		"    -I interface	interface name\n"
+		"    -N			Run in driver mode\n"
+		"    -s			Server mode\n"
+		"    -S			Run in skb mode\n",
+		prog, XDPING_DEFAULT_COUNT, XDPING_MAX_COUNT);
+}
+
+int main(int argc, char **argv)
+{
+	__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
+	struct addrinfo *a, hints = { .ai_family = AF_INET };
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	__u16 count = XDPING_DEFAULT_COUNT;
+	struct pinginfo pinginfo = { 0 };
+	const char *optstr = "c:I:NsS";
+	struct bpf_program *main_prog;
+	int prog_fd = -1, map_fd = -1;
+	struct sockaddr_in rin;
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	char *ifname = NULL;
+	char filename[256];
+	int opt, ret = 1;
+	__u32 raddr = 0;
+	int server = 0;
+	char cmd[256];
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'c':
+			count = atoi(optarg);
+			if (count < 1 || count > XDPING_MAX_COUNT) {
+				fprintf(stderr,
+					"min count is 1, max count is %d\n",
+					XDPING_MAX_COUNT);
+				return 1;
+			}
+			break;
+		case 'I':
+			ifname = optarg;
+			ifindex = if_nametoindex(ifname);
+			if (!ifindex) {
+				fprintf(stderr, "Could not get interface %s\n",
+					ifname);
+				return 1;
+			}
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		case 's':
+			/* use server program */
+			server = 1;
+			break;
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		default:
+			show_usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (!ifname) {
+		show_usage(basename(argv[0]));
+		return 1;
+	}
+	if (!server && optind == argc) {
+		show_usage(basename(argv[0]));
+		return 1;
+	}
+
+	if ((xdp_flags & mode_flags) == mode_flags) {
+		fprintf(stderr, "-N or -S can be specified, not both.\n");
+		show_usage(basename(argv[0]));
+		return 1;
+	}
+
+	if (!server) {
+		/* Only supports IPv4; see hints initiailization above. */
+		if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
+			fprintf(stderr, "Could not resolve %s\n", argv[optind]);
+			return 1;
+		}
+		memcpy(&rin, a->ai_addr, sizeof(rin));
+		raddr = rin.sin_addr.s_addr;
+		freeaddrinfo(a);
+	}
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
+		fprintf(stderr, "load of %s failed\n", filename);
+		return 1;
+	}
+
+	main_prog = bpf_object__find_program_by_title(obj,
+						      server ? "xdpserver" :
+							       "xdpclient");
+	if (main_prog)
+		prog_fd = bpf_program__fd(main_prog);
+	if (!main_prog || prog_fd < 0) {
+		fprintf(stderr, "could not find xdping program");
+		return 1;
+	}
+
+	map = bpf_map__next(NULL, obj);
+	if (map)
+		map_fd = bpf_map__fd(map);
+	if (!map || map_fd < 0) {
+		fprintf(stderr, "Could not find ping map");
+		goto done;
+	}
+
+	signal(SIGINT, cleanup);
+	signal(SIGTERM, cleanup);
+
+	printf("Setting up XDP for %s, please wait...\n", ifname);
+
+	printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
+
+	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+		fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
+		goto done;
+	}
+
+	if (server) {
+		close(prog_fd);
+		close(map_fd);
+		printf("Running server on %s; press Ctrl+C to exit...\n",
+		       ifname);
+		do { } while (1);
+	}
+
+	/* Start xdping-ing from last regular ping reply, e.g. for a count
+	 * of 10 ICMP requests, we start xdping-ing using reply with seq number
+	 * 10.  The reason the last "real" ping RTT is much higher is that
+	 * the ping program sees the ICMP reply associated with the last
+	 * XDP-generated packet, so ping doesn't get a reply until XDP is done.
+	 */
+	pinginfo.seq = htons(count);
+	pinginfo.count = count;
+
+	if (bpf_map_update_elem(map_fd, &raddr, &pinginfo, BPF_ANY)) {
+		fprintf(stderr, "could not communicate with BPF map: %s\n",
+			strerror(errno));
+		cleanup(0);
+		goto done;
+	}
+
+	/* We need to wait for XDP setup to complete. */
+	sleep(10);
+
+	snprintf(cmd, sizeof(cmd), "ping -c %d -I %s %s",
+		 count, ifname, argv[optind]);
+
+	printf("\nNormal ping RTT data\n");
+	printf("[Ignore final RTT; it is distorted by XDP using the reply]\n");
+
+	ret = system(cmd);
+
+	if (!ret)
+		ret = get_stats(map_fd, count, raddr);
+
+	cleanup(0);
+
+done:
+	if (prog_fd > 0)
+		close(prog_fd);
+	if (map_fd > 0)
+		close(map_fd);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/xdping.h b/tools/testing/selftests/bpf/xdping.h
new file mode 100644
index 000000000000..afc578df77be
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdping.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
+
+#define	XDPING_MAX_COUNT	10
+#define	XDPING_DEFAULT_COUNT	4
+
+struct pinginfo {
+	__u64	start;
+	__be16	seq;
+	__u16	count;
+	__u32	pad;
+	__u64	times[XDPING_MAX_COUNT];
+};
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
new file mode 100755
index 000000000000..e99ae500f387
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
@@ -0,0 +1,349 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test unicast FIB offload indication.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv6_route_add
+	ipv6_route_replace
+	ipv6_route_nexthop_group_share
+	ipv6_route_rate
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+tor1_create()
+{
+	simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor1_destroy()
+{
+	simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor2_create()
+{
+	simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+tor2_destroy()
+{
+	simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+spine_create()
+{
+	ip link set dev $spine_p1 up
+	ip link set dev $spine_p2 up
+
+	__addr_add_del $spine_p1 add 2001:db8:1::1/64
+	__addr_add_del $spine_p2 add 2001:db8:2::1/64
+}
+
+spine_destroy()
+{
+	__addr_add_del $spine_p2 del 2001:db8:2::1/64
+	__addr_add_del $spine_p1 del 2001:db8:1::1/64
+
+	ip link set dev $spine_p2 down
+	ip link set dev $spine_p1 down
+}
+
+ipv6_offload_check()
+{
+	local pfx="$1"; shift
+	local expected_num=$1; shift
+	local num
+
+	# Try to avoid races with route offload
+	sleep .1
+
+	num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l)
+
+	if [ $num -eq $expected_num ]; then
+		return 0
+	fi
+
+	return 1
+}
+
+ipv6_route_add_prefix()
+{
+	RET=0
+
+	# Add a prefix route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "prefix route not offloaded"
+
+	# Append an identical prefix route with an higher metric and check that
+	# offload indication did not change.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend an identical prefix route with lower metric and check that
+	# it is offloaded and the others are not.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Delete the routes and add the same route with a different nexthop
+	# device. Check that it is offloaded.
+	ip -6 route flush 2001:db8:3::/64 dev $spine_p1
+	ip -6 route add 2001:db8:3::/64 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1
+
+	log_test "IPv6 prefix route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add_mpath()
+{
+	RET=0
+
+	# Add a multipath route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded when should"
+
+	# Append another nexthop and check that it is offloaded as well.
+	ip -6 route append 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 3
+	check_err $? "appended nexthop not offloaded when should"
+
+	# Mimic route replace by removing the route and adding it back with
+	# only two nexthops.
+	ip -6 route del 2001:db8:3::/64
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after delete & add"
+
+	# Append a nexthop with an higher metric and check that the offload
+	# indication did not change.
+	ip -6 route append 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend a nexthop with a lower metric and check that it is offloaded
+	# and the others are not.
+	ip -6 route append 2001:db8:3::/64 metric 10 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	log_test "IPv6 multipath route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add()
+{
+	ipv6_route_add_prefix
+	ipv6_route_add_mpath
+}
+
+ipv6_route_replace()
+{
+	RET=0
+
+	# Replace prefix route with prefix route.
+	ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded when should"
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded after replace"
+
+	# Replace prefix route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replace"
+
+	# Replace multipath route with prefix route. A prefix route cannot
+	# replace a multipath route, so it is appended.
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0
+	check_err $? "prefix route offloaded after 'replacing' multipath route"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after being 'replaced' by prefix route"
+
+	# Replace multipath route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replacing multipath route"
+
+	# Replace a non-existing multipath route with a multipath route and
+	# check that it is appended and not offloaded.
+	ip -6 route replace 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after non-existing route was 'replaced'"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "multipath route offloaded after 'replacing' non-existing route"
+
+	log_test "IPv6 route replace"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_nexthop_group_share()
+{
+	RET=0
+
+	# The driver consolidates identical nexthop groups in order to reduce
+	# the resource usage in its adjacency table. Check that the deletion
+	# of one multipath route using the group does not affect the other.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route add 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ip -6 route del 2001:db8:3::/64
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group"
+
+	# Check that after unsharing a nexthop group the routes are still
+	# marked as offloaded.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route del 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1
+	ipv6_offload_check "2001:db8:4::/64" 1
+	check_err $? "singlepath route not offloaded after unsharing the nexthop group"
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded after unsharing the nexthop group"
+
+	log_test "IPv6 nexthop group sharing"
+
+	ip -6 route flush 2001:db8:3::/64
+	ip -6 route flush 2001:db8:4::/64
+}
+
+ipv6_route_rate()
+{
+	local batch_dir=$(mktemp -d)
+	local num_rts=$((40 * 1024))
+	local num_nhs=16
+	local total
+	local start
+	local diff
+	local end
+	local nhs
+	local i
+
+	RET=0
+
+	# Prepare 40K /64 multipath routes with 16 nexthops each and check how
+	# long it takes to add them. A limit of 60 seconds is set. It is much
+	# higher than insertion should take and meant to flag a serious
+	# regression.
+	total=$((nums_nhs * num_rts))
+
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1
+		nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1"
+	done
+
+	for i in $(seq 1 $num_rts); do
+		echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/add.batch
+		echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/del.batch
+	done
+
+	start=$(date +%s.%N)
+
+	ip -batch $batch_dir/add.batch
+	count=$(ip -6 route show | grep offload | wc -l)
+	while [ $count -lt $total ]; do
+		sleep .01
+		count=$(ip -6 route show | grep offload | wc -l)
+	done
+
+	end=$(date +%s.%N)
+
+	diff=$(echo "$end - $start" | bc -l)
+	test "$(echo "$diff > 60" | bc -l)" -eq 0
+	check_err $? "route insertion took too long"
+	log_info "inserted $num_rts routes in $diff seconds"
+
+	log_test "IPv6 routes insertion rate"
+
+	ip -batch $batch_dir/del.batch
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1
+	done
+	rm -rf $batch_dir
+}
+
+setup_prepare()
+{
+	spine_p1=${NETIFS[p1]}
+	tor1_p1=${NETIFS[p2]}
+
+	spine_p2=${NETIFS[p3]}
+	tor2_p1=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	tor1_create
+	tor2_create
+	spine_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	spine_destroy
+	tor2_destroy
+	tor1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
new file mode 100755
index 000000000000..9d8baf5d14b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="fw_flash_test"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+BUS_ADDR=10
+PORT_COUNT=4
+DEV_NAME=netdevsim$BUS_ADDR
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
+DL_HANDLE=netdevsim/$DEV_NAME
+
+fw_flash_test()
+{
+	RET=0
+
+	devlink dev flash $DL_HANDLE file dummy
+	check_err $? "Failed to flash with status updates on"
+
+	echo "n"> $DEBUGFS_DIR/fw_update_status
+	check_err $? "Failed to disable status updates"
+
+	devlink dev flash $DL_HANDLE file dummy
+	check_err $? "Failed to flash with status updates off"
+
+	log_test "fw flash test"
+}
+
+setup_prepare()
+{
+	modprobe netdevsim
+	echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+}
+
+cleanup()
+{
+	pre_cleanup
+	echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device
+	modprobe -r netdevsim
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6f81130605d7..4ce0bc1612f5 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -17,3 +17,7 @@ tcp_inq
 tls
 txring_overwrite
 ip_defrag
+so_txtime
+flowlabel
+flowlabel_mgr
+tcp_fastopen_backup_key
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 1e6d14d2825c..1b24e36b4047 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,16 +5,19 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
-	      rtnetlink.sh xfrm_policy.sh
+	      rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh
+TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += tcp_fastopen_backup_key
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3dea2cba2325..b8503a8119b0 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -25,4 +25,7 @@ CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_TEST_BLACKHOLE_DEV=m
 CONFIG_KALLSYMS=y
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 864f865eee55..c287b90b8af8 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -4,6 +4,7 @@
 # IPv4 and IPv6 onlink tests
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+VERBOSE=0
 
 # Network interfaces
 # - odd in current namespace; even in peer ns
@@ -91,10 +92,10 @@ log_test()
 
 	if [ ${rc} -eq ${expected} ]; then
 		nsuccess=$((nsuccess+1))
-		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+		printf "    TEST: %-50s  [ OK ]\n" "${msg}"
 	else
 		nfail=$((nfail+1))
-		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		printf "    TEST: %-50s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 			echo
 			echo "hit enter to continue, 'q' to quit"
@@ -121,9 +122,23 @@ log_subsection()
 
 run_cmd()
 {
-	echo
-	echo "COMMAND: $*"
-	eval $*
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
 }
 
 get_linklocal()
@@ -451,11 +466,34 @@ run_onlink_tests()
 }
 
 ################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -p          Pause on fail
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
 # main
 
 nsuccess=0
 nfail=0
 
+while getopts :t:pPhv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
 cleanup
 setup
 run_onlink_tests
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
new file mode 100755
index 000000000000..e6828732843e
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Validate cached routes in fib{6}_nh that is used by multiple prefixes.
+# Validate a different # exception is generated in h0 for each remote host.
+#
+#               h1
+#            /
+#    h0 - r1 -  h2
+#            \
+#               h3
+#
+# routing in h0 to hN is done with nexthop objects.
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "$out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+################################################################################
+# config
+
+create_ns()
+{
+	local ns=${1}
+
+	ip netns del ${ns} 2>/dev/null
+
+	ip netns add ${ns}
+	ip -netns ${ns} addr add 127.0.0.1/8 dev lo
+	ip -netns ${ns} link set lo up
+
+	ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+	case ${ns} in
+	h*)
+		ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+		;;
+	r*)
+		ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+		ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		;;
+	esac
+}
+
+setup()
+{
+	local ns
+	local i
+
+	#set -e
+
+	for ns in h0 r1 h1 h2 h3
+	do
+		create_ns ${ns}
+	done
+
+	#
+	# create interconnects
+	#
+
+	for i in 0 1 2 3
+	do
+		ip -netns h${i} li add eth0 type veth peer name r1h${i}
+		ip -netns h${i} li set eth0 up
+		ip -netns h${i} li set r1h${i} netns r1 name eth${i} up
+
+		ip -netns h${i}    addr add dev eth0 172.16.10${i}.1/24
+		ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64
+		ip -netns r1    addr add dev eth${i} 172.16.10${i}.254/24
+		ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64
+	done
+
+	ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0
+	ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0
+
+	# routing from h0 to h1-h3 and back
+	for i in 1 2 3
+	do
+		ip -netns h0    ro add 172.16.10${i}.0/24 nhid 4
+		ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254
+
+		ip -netns h0    -6 ro add 2001:db8:10${i}::/64 nhid 6
+		ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
+	done
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "host 1 config"
+		ip -netns h0 li sh
+		ip -netns h0 ro sh
+		ip -netns h0 -6 ro sh
+	fi
+
+	#set +e
+}
+
+cleanup()
+{
+	for n in h1 r1 h2 h3 h4
+	do
+		ip netns del ${n} 2>/dev/null
+	done
+}
+
+change_mtu()
+{
+	local hostid=$1
+	local mtu=$2
+
+	run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu}
+	run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu}
+}
+
+################################################################################
+# validate exceptions
+
+validate_v4_exception()
+{
+	local i=$1
+	local mtu=$2
+	local ping_sz=$3
+	local dst="172.16.10${i}.1"
+	local h0=172.16.100.1
+	local r1=172.16.100.254
+	local rc
+
+	if [ ${ping_sz} != "0" ]; then
+		run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst}
+	fi
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "Route get"
+		ip -netns h0 ro get ${dst}
+		echo "Searching for:"
+		echo "    cache .* mtu ${mtu}"
+		echo
+	fi
+
+	ip -netns h0 ro get ${dst} | \
+	grep -q "cache .* mtu ${mtu}"
+	rc=$?
+
+	log_test $rc 0 "IPv4: host 0 to host ${i}, mtu ${mtu}"
+}
+
+validate_v6_exception()
+{
+	local i=$1
+	local mtu=$2
+	local ping_sz=$3
+	local dst="2001:db8:10${i}::1"
+	local h0=2001:db8:100::1
+	local r1=2001:db8:100::64
+	local rc
+
+	if [ ${ping_sz} != "0" ]; then
+		run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst}
+	fi
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "Route get"
+		ip -netns h0 -6 ro get ${dst}
+		echo "Searching for:"
+		echo "    ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+		echo
+	fi
+
+	ip -netns h0 -6 ro get ${dst} | \
+	grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+	rc=$?
+
+	log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}"
+}
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=1;;
+	esac
+done
+
+cleanup
+setup
+sleep 2
+
+cpus=$(cat  /sys/devices/system/cpu/online)
+cpus="$(seq ${cpus/-/ })"
+ret=0
+for i in 1 2 3
+do
+	# generate a cached route per-cpu
+	for c in ${cpus}; do
+		run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
+		[ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
+
+		run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1
+		[ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
+
+		[ $ret -ne 0 ] && break
+	done
+	[ $ret -ne 0 ] && break
+done
+
+if [ $ret -eq 0 ]; then
+	# generate different exceptions in h0 for h1, h2 and h3
+	change_mtu 1 1300
+	validate_v4_exception 1 1300 1350
+	validate_v6_exception 1 1300 1350
+	echo
+
+	change_mtu 2 1350
+	validate_v4_exception 2 1350 1400
+	validate_v6_exception 2 1350 1400
+	echo
+
+	change_mtu 3 1400
+	validate_v4_exception 3 1400 1450
+	validate_v6_exception 3 1400 1450
+	echo
+
+	validate_v4_exception 1 1300 0
+	validate_v6_exception 1 1300 0
+	echo
+
+	validate_v4_exception 2 1350 0
+	validate_v6_exception 2 1350 0
+	echo
+
+	validate_v4_exception 3 1400 0
+	validate_v6_exception 3 1400 0
+
+	# targeted deletes to trigger cleanup paths in kernel
+	ip -netns h0 ro del 172.16.102.0/24 nhid 4
+	ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6
+
+	ip -netns h0 nexthop del id 4
+	ip -netns h0 nexthop del id 6
+fi
+
+cleanup
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
new file mode 100755
index 000000000000..c5c93d5fb3ad
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -0,0 +1,1026 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: me               | ns: peer              | ns: remote
+#   2001:db8:91::1     |       2001:db8:91::2  |
+#   172.16.1.1         |       172.16.1.2      |
+#            veth1 <---|---> veth2             |
+#                      |              veth5 <--|--> veth6  172.16.101.1
+#            veth3 <---|---> veth4             |           2001:db8:101::1
+#   172.16.2.1         |       172.16.2.2      |
+#   2001:db8:92::1     |       2001:db8:92::2  |
+#
+# This test is for checking IPv4 and IPv6 FIB behavior with nexthop
+# objects. Device reference counts and network namespace cleanup tested
+# by use of network namespace for peer.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime"
+
+ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+TESTS="${ALL_TESTS}"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+nsid=100
+
+################################################################################
+# utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "$VERBOSE" = "1" ]; then
+			echo "    rc=$rc, expected $expected"
+		fi
+
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local ns
+	local addr
+
+	[ -n "$2" ] && ns="-netns $2"
+	addr=$(ip $ns -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+create_ns()
+{
+	local n=${1}
+
+	ip netns del ${n} 2>/dev/null
+
+	set -e
+	ip netns add ${n}
+	ip netns set ${n} $((nsid++))
+	ip -netns ${n} addr add 127.0.0.1/8 dev lo
+	ip -netns ${n} link set lo up
+
+	ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1
+	ip netns exec ${n} sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.default.forwarding=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.default.accept_dad=0
+
+	set +e
+}
+
+setup()
+{
+	cleanup
+
+	create_ns me
+	create_ns peer
+	create_ns remote
+
+	IP="ip -netns me"
+	set -e
+	$IP li add veth1 type veth peer name veth2
+	$IP li set veth1 up
+	$IP addr add 172.16.1.1/24 dev veth1
+	$IP -6 addr add 2001:db8:91::1/64 dev veth1
+
+	$IP li add veth3 type veth peer name veth4
+	$IP li set veth3 up
+	$IP addr add 172.16.2.1/24 dev veth3
+	$IP -6 addr add 2001:db8:92::1/64 dev veth3
+
+	$IP li set veth2 netns peer up
+	ip -netns peer addr add 172.16.1.2/24 dev veth2
+	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2
+
+	$IP li set veth4 netns peer up
+	ip -netns peer addr add 172.16.2.2/24 dev veth4
+	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4
+
+	ip -netns remote li add veth5 type veth peer name veth6
+	ip -netns remote li set veth5 up
+	ip -netns remote addr add dev veth5 172.16.101.1/24
+	ip -netns remote addr add dev veth5 2001:db8:101::1/64
+	ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
+	ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
+
+	ip -netns remote li set veth6 netns peer up
+	ip -netns peer addr add dev veth6 172.16.101.2/24
+	ip -netns peer addr add dev veth6 2001:db8:101::2/64
+	set +e
+}
+
+cleanup()
+{
+	local ns
+
+	for ns in me peer remote; do
+		ip netns del ${ns} 2>/dev/null
+	done
+}
+
+check_output()
+{
+	local out="$1"
+	local expected="$2"
+	local rc=0
+
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+check_nexthop()
+{
+	local nharg="$1"
+	local expected="$2"
+	local out
+
+	out=$($IP nexthop ls ${nharg} 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
+check_route()
+{
+	local pfx="$1"
+	local expected="$2"
+	local out
+
+	out=$($IP route ls match ${pfx} 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
+check_route6()
+{
+	local pfx="$1"
+	local expected="$2"
+	local out
+
+	out=$($IP -6 route ls match ${pfx} 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
+################################################################################
+# basic operations (add, delete, replace) on nexthops and nexthop groups
+#
+# IPv6
+
+ipv6_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6"
+	echo "----------------------"
+
+	run_cmd "$IP nexthop add id 52 via 2001:db8:91::2 dev veth1"
+	rc=$?
+	log_test $rc 0 "Create nexthop with id, gw, dev"
+	if [ $rc -ne 0 ]; then
+		echo "Basic IPv6 create fails; can not continue"
+		return 1
+	fi
+
+	run_cmd "$IP nexthop get id 52"
+	log_test $? 0 "Get nexthop by id"
+	check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1"
+
+	run_cmd "$IP nexthop del id 52"
+	log_test $? 0 "Delete nexthop by id"
+	check_nexthop "id 52" ""
+
+	#
+	# gw, device spec
+	#
+	# gw validation, no device - fails since dev required
+	run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
+	log_test $? 2 "Create nexthop - gw only"
+
+	# gw is not reachable throught given dev
+	run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
+	log_test $? 2 "Create nexthop - invalid gw+dev combination"
+
+	# onlink arg overrides gw+dev lookup
+	run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1 onlink"
+	log_test $? 0 "Create nexthop - gw+dev and onlink"
+
+	# admin down should delete nexthops
+	set -e
+	run_cmd "$IP -6 nexthop add id 55 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 56 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 57 via 2001:db8:91::5 dev veth1"
+	run_cmd "$IP li set dev veth1 down"
+	set +e
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops removed on admin down"
+}
+
+ipv6_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6 groups functional"
+	echo "----------------------"
+
+	# basic functionality: create a nexthop group, default weight
+	run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 61"
+	log_test $? 0 "Create nexthop group with single nexthop"
+
+	# get nexthop group
+	run_cmd "$IP nexthop get id 101"
+	log_test $? 0 "Get nexthop group by id"
+	check_nexthop "id 101" "id 101 group 61"
+
+	# delete nexthop group
+	run_cmd "$IP nexthop del id 101"
+	log_test $? 0 "Delete nexthop group by id"
+	check_nexthop "id 101" ""
+
+	$IP nexthop flush >/dev/null 2>&1
+	check_nexthop "id 101" ""
+
+	#
+	# create group with multiple nexthops - mix of gw and dev only
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 65 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62/63/64/65"
+	log_test $? 0 "Nexthop group with multiple nexthops"
+	check_nexthop "id 102" "id 102 group 62/63/64/65"
+
+	# Delete nexthop in a group and group is updated
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" "id 102 group 62/64/65"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+
+	# create group with multiple weighted nexthops
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 103 group 62/63,2/64,3/65,4"
+	log_test $? 0 "Nexthop group with weighted nexthops"
+	check_nexthop "id 103" "id 103 group 62/63,2/64,3/65,4"
+
+	# Delete nexthop in a weighted group and group is updated
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 103" "id 103 group 62/64,3/65,4"
+	log_test $? 0 "Weighted nexthop group updated when entry is deleted"
+
+	# admin down - nexthop is removed from group
+	run_cmd "$IP li set dev veth1 down"
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops in groups removed on admin down"
+
+	# expect groups to have been deleted as well
+	check_nexthop "" ""
+
+	run_cmd "$IP li set dev veth1 up"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	# group with nexthops using different devices
+	set -e
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+
+	run_cmd "$IP nexthop add id 72 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 73 via 2001:db8:92::3 dev veth3"
+	run_cmd "$IP nexthop add id 74 via 2001:db8:92::4 dev veth3"
+	run_cmd "$IP nexthop add id 75 via 2001:db8:92::5 dev veth3"
+	set +e
+
+	# multiple groups with same nexthop
+	run_cmd "$IP nexthop add id 104 group 62"
+	run_cmd "$IP nexthop add id 105 group 62"
+	check_nexthop "group" "id 104 group 62 id 105 group 62"
+	log_test $? 0 "Multiple groups with same nexthop"
+
+	run_cmd "$IP nexthop flush groups"
+	[ $? -ne 0 ] && return 1
+
+	# on admin down of veth1, it should be removed from the group
+	run_cmd "$IP nexthop add id 105 group 62/63/72/73/64"
+	run_cmd "$IP li set veth1 down"
+	check_nexthop "id 105" "id 105 group 72/73"
+	log_test $? 0 "Nexthops in group removed on admin down - mixed group"
+
+	run_cmd "$IP nexthop add id 106 group 105/74"
+	log_test $? 2 "Nexthop group can not have a group as an entry"
+
+	# a group can have a blackhole entry only if it is the only
+	# nexthop in the group. Needed for atomic replace with an
+	# actual nexthop group
+	run_cmd "$IP -6 nexthop add id 31 blackhole"
+	run_cmd "$IP nexthop add id 107 group 31"
+	log_test $? 0 "Nexthop group with a blackhole entry"
+
+	run_cmd "$IP nexthop add id 108 group 31/24"
+	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+}
+
+ipv6_fcnal_runtime()
+{
+	local rc
+
+	echo
+	echo "IPv6 functional runtime"
+	echo "-----------------------"
+
+	sleep 5
+
+	#
+	# IPv6 - the basics
+	#
+	run_cmd "$IP nexthop add id 81 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+	log_test $? 0 "Route add"
+
+	run_cmd "$IP ro delete 2001:db8:101::1/128 nhid 81"
+	log_test $? 0 "Route delete"
+
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 0 "Ping with nexthop"
+
+	run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 122 group 81/82"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 0 "Ping - multipath"
+
+	#
+	# IPv6 with blackhole nexthops
+	#
+	run_cmd "$IP -6 nexthop add id 83 blackhole"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 2 "Ping - blackhole"
+
+	run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 0 "Ping - blackhole replaced with gateway"
+
+	run_cmd "$IP -6 nexthop replace id 83 blackhole"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 2 "Ping - gateway replaced by blackhole"
+
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	if [ $? -eq 0 ]; then
+		run_cmd "$IP nexthop replace id 122 group 83"
+		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		log_test $? 2 "Ping - group with blackhole"
+
+		run_cmd "$IP nexthop replace id 122 group 81/82"
+		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		log_test $? 0 "Ping - group blackhole replaced with gateways"
+	else
+		log_test 2 0 "Ping - multipath failed"
+	fi
+
+	#
+	# device only and gw + dev only mix
+	#
+	run_cmd "$IP -6 nexthop add id 85 dev veth1"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
+	log_test $? 0 "IPv6 route with device only nexthop"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1"
+
+	run_cmd "$IP nexthop add id 123 group 81/85"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
+	log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 nexthop via 2001:db8:91::2 dev veth1 nexthop dev veth1"
+
+	#
+	# IPv6 route with v4 nexthop - not allowed
+	#
+	run_cmd "$IP ro delete 2001:db8:101::1/128"
+	run_cmd "$IP nexthop add id 84 via 172.16.1.1 dev veth1"
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 84"
+	log_test $? 2 "IPv6 route can not have a v4 gateway"
+
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 81"
+	run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
+	log_test $? 2 "Nexthop replace - v6 route, v4 nexthop"
+
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+	run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
+	log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# weird IPv6 cases
+	#
+	run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+
+	# TO-DO:
+	# existing route with old nexthop; append route with new nexthop
+	# existing route with old nexthop; replace route with new
+	# existing route with new nexthop; replace route with old
+	# route with src address and using nexthop - not allowed
+}
+
+ipv4_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 functional"
+	echo "----------------------"
+
+	#
+	# basic IPv4 ops - add, get, delete
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	rc=$?
+	log_test $rc 0 "Create nexthop with id, gw, dev"
+	if [ $rc -ne 0 ]; then
+		echo "Basic IPv4 create fails; can not continue"
+		return 1
+	fi
+
+	run_cmd "$IP nexthop get id 12"
+	log_test $? 0 "Get nexthop by id"
+	check_nexthop "id 12" "id 12 via 172.16.1.2 src 172.16.1.1 dev veth1 scope link"
+
+	run_cmd "$IP nexthop del id 12"
+	log_test $? 0 "Delete nexthop by id"
+	check_nexthop "id 52" ""
+
+	#
+	# gw, device spec
+	#
+	# gw validation, no device - fails since dev is required
+	run_cmd "$IP nexthop add id 12 via 172.16.2.3"
+	log_test $? 2 "Create nexthop - gw only"
+
+	# gw not reachable through given dev
+	run_cmd "$IP nexthop add id 13 via 172.16.3.2 dev veth1"
+	log_test $? 2 "Create nexthop - invalid gw+dev combination"
+
+	# onlink flag overrides gw+dev lookup
+	run_cmd "$IP nexthop add id 13 via 172.16.3.2 dev veth1 onlink"
+	log_test $? 0 "Create nexthop - gw+dev and onlink"
+
+	# admin down should delete nexthops
+	set -e
+	run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 16 via 172.16.1.4 dev veth1"
+	run_cmd "$IP nexthop add id 17 via 172.16.1.5 dev veth1"
+	run_cmd "$IP li set dev veth1 down"
+	set +e
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops removed on admin down"
+}
+
+ipv4_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 groups functional"
+	echo "----------------------"
+
+	# basic functionality: create a nexthop group, default weight
+	run_cmd "$IP nexthop add id 11 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 11"
+	log_test $? 0 "Create nexthop group with single nexthop"
+
+	# get nexthop group
+	run_cmd "$IP nexthop get id 101"
+	log_test $? 0 "Get nexthop group by id"
+	check_nexthop "id 101" "id 101 group 11"
+
+	# delete nexthop group
+	run_cmd "$IP nexthop del id 101"
+	log_test $? 0 "Delete nexthop group by id"
+	check_nexthop "id 101" ""
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# create group with multiple nexthops
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 14 via 172.16.1.4 dev veth1"
+	run_cmd "$IP nexthop add id 15 via 172.16.1.5 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12/13/14/15"
+	log_test $? 0 "Nexthop group with multiple nexthops"
+	check_nexthop "id 102" "id 102 group 12/13/14/15"
+
+	# Delete nexthop in a group and group is updated
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" "id 102 group 12/14/15"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+
+	# create group with multiple weighted nexthops
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 103 group 12/13,2/14,3/15,4"
+	log_test $? 0 "Nexthop group with weighted nexthops"
+	check_nexthop "id 103" "id 103 group 12/13,2/14,3/15,4"
+
+	# Delete nexthop in a weighted group and group is updated
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 103" "id 103 group 12/14,3/15,4"
+	log_test $? 0 "Weighted nexthop group updated when entry is deleted"
+
+	# admin down - nexthop is removed from group
+	run_cmd "$IP li set dev veth1 down"
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops in groups removed on admin down"
+
+	# expect groups to have been deleted as well
+	check_nexthop "" ""
+
+	run_cmd "$IP li set dev veth1 up"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	# group with nexthops using different devices
+	set -e
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 14 via 172.16.1.4 dev veth1"
+	run_cmd "$IP nexthop add id 15 via 172.16.1.5 dev veth1"
+
+	run_cmd "$IP nexthop add id 22 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 23 via 172.16.2.3 dev veth3"
+	run_cmd "$IP nexthop add id 24 via 172.16.2.4 dev veth3"
+	run_cmd "$IP nexthop add id 25 via 172.16.2.5 dev veth3"
+	set +e
+
+	# multiple groups with same nexthop
+	run_cmd "$IP nexthop add id 104 group 12"
+	run_cmd "$IP nexthop add id 105 group 12"
+	check_nexthop "group" "id 104 group 12 id 105 group 12"
+	log_test $? 0 "Multiple groups with same nexthop"
+
+	run_cmd "$IP nexthop flush groups"
+	[ $? -ne 0 ] && return 1
+
+	# on admin down of veth1, it should be removed from the group
+	run_cmd "$IP nexthop add id 105 group 12/13/22/23/14"
+	run_cmd "$IP li set veth1 down"
+	check_nexthop "id 105" "id 105 group 22/23"
+	log_test $? 0 "Nexthops in group removed on admin down - mixed group"
+
+	run_cmd "$IP nexthop add id 106 group 105/24"
+	log_test $? 2 "Nexthop group can not have a group as an entry"
+
+	# a group can have a blackhole entry only if it is the only
+	# nexthop in the group. Needed for atomic replace with an
+	# actual nexthop group
+	run_cmd "$IP nexthop add id 31 blackhole"
+	run_cmd "$IP nexthop add id 107 group 31"
+	log_test $? 0 "Nexthop group with a blackhole entry"
+
+	run_cmd "$IP nexthop add id 108 group 31/24"
+	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+}
+
+ipv4_withv6_fcnal()
+{
+	local lladdr
+
+	set -e
+	lladdr=$(get_linklocal veth2 peer)
+	run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1"
+	set +e
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 11"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+	check_route "172.16.101.1" "172.16.101.1 nhid 11 via ${lladdr} dev veth1"
+
+	set -e
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 11/12"
+	set +e
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+	log_test $? 0 "IPv4 route with IPv6 gateway"
+	check_route "172.16.101.1" "172.16.101.1 via ${lladdr} dev veth1"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1"
+	log_test $? 2 "IPv4 route with invalid IPv6 gateway"
+}
+
+ipv4_fcnal_runtime()
+{
+	local lladdr
+	local rc
+
+	echo
+	echo "IPv4 functional runtime"
+	echo "-----------------------"
+
+	run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1"
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 21"
+	log_test $? 0 "Route add"
+	check_route "172.16.101.1" "172.16.101.1 nhid 21 via 172.16.1.2 dev veth1"
+
+	run_cmd "$IP ro delete 172.16.101.1/32 nhid 21"
+	log_test $? 0 "Route delete"
+
+	#
+	# scope mismatch
+	#
+	run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1"
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 22 scope host"
+	log_test $? 2 "Route add - scope conflict with nexthop"
+
+	run_cmd "$IP nexthop replace id 22 dev veth3"
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 22 scope host"
+	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
+	log_test $? 2 "Nexthop replace with invalid scope for existing route"
+
+	#
+	# add route with nexthop and check traffic
+	#
+	run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Basic ping"
+
+	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 122 group 21/22"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Ping - multipath"
+
+	#
+	# IPv4 with blackhole nexthops
+	#
+	run_cmd "$IP nexthop add id 23 blackhole"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 2 "Ping - blackhole"
+
+	run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Ping - blackhole replaced with gateway"
+
+	run_cmd "$IP nexthop replace id 23 blackhole"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 2 "Ping - gateway replaced by blackhole"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	if [ $? -eq 0 ]; then
+		run_cmd "$IP nexthop replace id 122 group 23"
+		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		log_test $? 2 "Ping - group with blackhole"
+
+		run_cmd "$IP nexthop replace id 122 group 21/22"
+		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		log_test $? 0 "Ping - group blackhole replaced with gateways"
+	else
+		log_test 2 0 "Ping - multipath failed"
+	fi
+
+	#
+	# device only and gw + dev only mix
+	#
+	run_cmd "$IP nexthop add id 85 dev veth1"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 85"
+	log_test $? 0 "IPv4 route with device only nexthop"
+	check_route "172.16.101.1" "172.16.101.1 nhid 85 dev veth1"
+
+	run_cmd "$IP nexthop add id 122 group 21/85"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+	log_test $? 0 "IPv4 multipath route with nexthop mix - dev only + gw"
+	check_route "172.16.101.1" "172.16.101.1 nhid 85 nexthop via 172.16.1.2 dev veth1 nexthop dev veth1"
+
+	#
+	# IPv4 with IPv6
+	#
+	set -e
+	lladdr=$(get_linklocal veth2 peer)
+	run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
+	set +e
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+	$IP neigh sh | grep -q "${lladdr} dev veth1"
+	if [ $? -eq 1 ]; then
+		echo "    WARNING: Neigh entry missing for ${lladdr}"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	$IP neigh sh | grep -q "172.16.101.1 dev eth1"
+	if [ $? -eq 0 ]; then
+		echo "    WARNING: Neigh entry exists for 172.16.101.1"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	set -e
+	run_cmd "$IP nexthop add id 25 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 24/25"
+	set +e
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
+	log_test $? 0 "IPv4 route with mixed v4-v6 multipath route"
+
+	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv4 route with IPv6 gateway"
+
+	$IP neigh sh | grep -q "${lladdr} dev veth1"
+	if [ $? -eq 1 ]; then
+		echo "    WARNING: Neigh entry missing for ${lladdr}"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	$IP neigh sh | grep -q "172.16.101.1 dev eth1"
+	if [ $? -eq 0 ]; then
+		echo "    WARNING: Neigh entry exists for 172.16.101.1"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	#
+	# MPLS as an example of LWT encap
+	#
+	run_cmd "$IP nexthop add id 51 encap mpls 101 via 172.16.1.2 dev veth1"
+	log_test $? 0 "IPv4 route with MPLS encap"
+	check_nexthop "id 51" "id 51 encap mpls 101 via 172.16.1.2 dev veth1 scope link"
+	log_test $? 0 "IPv4 route with MPLS encap - check"
+
+	run_cmd "$IP nexthop add id 52 encap mpls 102 via inet6 2001:db8:91::2 dev veth1"
+	log_test $? 0 "IPv4 route with MPLS encap and v6 gateway"
+	check_nexthop "id 52" "id 52 encap mpls 102 via 2001:db8:91::2 dev veth1 scope link"
+	log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
+}
+
+basic()
+{
+	echo
+	echo "Basic functional tests"
+	echo "----------------------"
+	run_cmd "$IP nexthop ls"
+	log_test $? 0 "List with nothing defined"
+
+	run_cmd "$IP nexthop get id 1"
+	log_test $? 2 "Nexthop get on non-existent id"
+
+	# attempt to create nh without a device or gw - fails
+	run_cmd "$IP nexthop add id 1"
+	log_test $? 2 "Nexthop with no device or gateway"
+
+	# attempt to create nh with down device - fails
+	$IP li set veth1 down
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	log_test $? 2 "Nexthop with down device"
+
+	# create nh with linkdown device - fails
+	$IP li set veth1 up
+	ip -netns peer li set veth2 down
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	log_test $? 2 "Nexthop with device that is linkdown"
+	ip -netns peer li set veth2 up
+
+	# device only
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	log_test $? 0 "Nexthop with device only"
+
+	# create nh with duplicate id
+	run_cmd "$IP nexthop add id 1 dev veth3"
+	log_test $? 2 "Nexthop with duplicate id"
+
+	# blackhole nexthop
+	run_cmd "$IP nexthop add id 2 blackhole"
+	log_test $? 0 "Blackhole nexthop"
+
+	# blackhole nexthop can not have other specs
+	run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
+	log_test $? 2 "Blackhole nexthop with other attributes"
+
+	#
+	# groups
+	#
+
+	run_cmd "$IP nexthop add id 101 group 1"
+	log_test $? 0 "Create group"
+
+	run_cmd "$IP nexthop add id 102 group 2"
+	log_test $? 0 "Create group with blackhole nexthop"
+
+	# multipath group can not have a blackhole as 1 path
+	run_cmd "$IP nexthop add id 103 group 1/2"
+	log_test $? 2 "Create multipath group where 1 path is a blackhole"
+
+	# multipath group can not have a member replaced by a blackhole
+	run_cmd "$IP nexthop replace id 2 dev veth3"
+	run_cmd "$IP nexthop replace id 102 group 1/2"
+	run_cmd "$IP nexthop replace id 2 blackhole"
+	log_test $? 2 "Multipath group can not have a member replaced by blackhole"
+
+	# attempt to create group with non-existent nexthop
+	run_cmd "$IP nexthop add id 103 group 12"
+	log_test $? 2 "Create group with non-existent nexthop"
+
+	# attempt to create group with same nexthop
+	run_cmd "$IP nexthop add id 103 group 1/1"
+	log_test $? 2 "Create group with same nexthop multiple times"
+
+	# replace nexthop with a group - fails
+	run_cmd "$IP nexthop replace id 2 group 1"
+	log_test $? 2 "Replace nexthop with nexthop group"
+
+	# replace nexthop group with a nexthop - fails
+	run_cmd "$IP nexthop replace id 101 dev veth1"
+	log_test $? 2 "Replace nexthop group with nexthop"
+
+	# nexthop group with other attributes fail
+	run_cmd "$IP nexthop add id 104 group 1 dev veth1"
+	log_test $? 2 "Nexthop group and device"
+
+	run_cmd "$IP nexthop add id 104 group 1 blackhole"
+	log_test $? 2 "Nexthop group and blackhole"
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $ALL_TESTS)
+        -4          IPv4 tests only
+        -6          IPv6 tests only
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+
+    Runtime test
+	-n num	    Number of nexthops to target
+	-N    	    Use new style to install routes in DUT
+
+done
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pP46hv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		4) TESTS=${IPV4_TESTS};;
+		6) TESTS=${IPV6_TESTS};;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip help 2>&1 | grep -q nexthop
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing nexthop command"
+	exit $ksft_skip
+fi
+
+out=$(ip nexthop ls 2>&1 | grep -q "Operation not supported")
+if [ $? -eq 0 ]; then
+	echo "SKIP: kernel lacks nexthop support"
+	exit $ksft_skip
+fi
+
+for t in $TESTS
+do
+	case $t in
+	none) IP="ip -netns peer"; setup; exit 0;;
+	*) setup; $t; cleanup;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
new file mode 100755
index 000000000000..e4009f658003
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
@@ -0,0 +1,305 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv4
+# GRE tunnel. The tunnel carries IPv4 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# | 192.0.3.{2-62}/24 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.3.1/24                          |
+# |                                            |
+# |  + g1 (gre)                                |
+# |    loc=192.0.2.65                          |
+# |    rem=192.0.2.66 --.                      |
+# |    tos=inherit      |                      |
+# |                     v                      |
+# |                     + $ul1                 |
+# |                     | 192.0.2.129/28       |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# | SW2                 |                      |
+# |               $ul21 +                      |
+# |      192.0.2.130/28                        |
+# |                   |                        |
+# !   ________________|_____                   |
+# |  /                      \                  |
+# |  |                      |                  |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan) |
+# |  | 192.0.2.145/28       | 192.0.2.161/28   |
+# |  |                      |                  |
+# +--|----------------------|------------------+
+#    |                      |
+# +--|----------------------|------------------+
+# |  |                      |                  |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan) |
+# |  | 192.0.2.146/28       | 192.0.2.162/28   |
+# |  |                      |                  |
+# |  \______________________/                  |
+# |                   |                        |
+# |                   |                        |
+# |               $ul31 +                      |
+# |      192.0.2.177/28 |                  SW3 |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# |                     + $ul4                 |
+# |                     ^ 192.0.2.178/28       |
+# |                     |                      |
+# |  + g2 (gre)         |                      |
+# |    loc=192.0.2.66   |                      |
+# |    rem=192.0.2.65 --'                      |
+# |    tos=inherit                             |
+# |                                            |
+# |               $ol4 +                       |
+# |       192.0.4.1/24 |                   SW4 |
+# +--------------------|-----------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |  192.0.4.{2-62}/24        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	multipath_ipv4
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.3.2/24
+	ip route add vrf v$h1 192.0.4.0/24 via 192.0.3.1
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.4.0/24 via 192.0.3.1
+	simple_if_fini $h1 192.0.3.2/24
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.3.1/24
+	__simple_if_init $ul1 v$ol1 192.0.2.129/28
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	ip route add vrf v$ol1 192.0.4.0/24 nexthop dev g1
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 192.0.4.0/24
+
+	ip route del vrf v$ol1 192.0.2.66/32
+	__simple_if_fini g1 192.0.2.65/32
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 192.0.2.129/28
+	simple_if_fini $ol1 192.0.3.1/24
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 192.0.2.130/28
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 192.0.2.145/28
+	vlan_create $ul22 222 v$ul21 192.0.2.161/28
+
+	ip route add vrf v$ul21 192.0.2.65/32 via 192.0.2.129
+	ip route add vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+}
+
+sw2_destroy()
+{
+	ip route del vrf v$ul21 192.0.2.66/32
+	ip route del vrf v$ul21 192.0.2.65/32
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 192.0.2.130/28
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 192.0.2.177/28
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 192.0.2.146/28
+	vlan_create $ul32 222 v$ul31 192.0.2.162/28
+
+	ip route add vrf v$ul31 192.0.2.66/32 via 192.0.2.178
+	ip route add vrf v$ul31 192.0.2.65/32 \
+	   nexthop via 192.0.2.145 \
+	   nexthop via 192.0.2.161
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip route del vrf v$ul31 192.0.2.65/32
+	ip route del vrf v$ul31 192.0.2.66/32
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 192.0.2.177/28
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 192.0.4.1/24
+	__simple_if_init $ul4 v$ol4 192.0.2.178/28
+
+	tunnel_create g2 gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 192.0.2.66/32
+	ip route add vrf v$ol4 192.0.2.65/32 via 192.0.2.177
+
+	ip route add vrf v$ol4 192.0.3.0/24 nexthop dev g2
+}
+
+sw4_destroy()
+{
+	ip route del vrf v$ol4 192.0.3.0/24
+
+	ip route del vrf v$ol4 192.0.2.65/32
+	__simple_if_fini g2 192.0.2.66/32
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 192.0.2.178/28
+	simple_if_fini $ol4 192.0.4.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.4.2/24
+	ip route add vrf v$h2 192.0.3.0/24 via 192.0.4.1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.3.0/24 via 192.0.4.1
+	simple_if_fini $h2 192.0.4.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 weight $weight1 \
+	   nexthop via 192.0.2.162 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.4.2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 over GRE over IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
new file mode 100755
index 000000000000..e449475c4d3e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv4
+# GRE tunnel. The tunnel carries IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |  2001:db8:1::2/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |  2001:db8:1::1/64                          |
+# |                                            |
+# |  + g1 (gre)                                |
+# |    loc=192.0.2.65                          |
+# |    rem=192.0.2.66 --.                      |
+# |    tos=inherit      |                      |
+# |                     v                      |
+# |                     + $ul1                 |
+# |                     | 192.0.2.129/28       |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# | SW2                 |                      |
+# |               $ul21 +                      |
+# |      192.0.2.130/28                        |
+# |                   |                        |
+# !   ________________|_____                   |
+# |  /                      \                  |
+# |  |                      |                  |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan) |
+# |  | 192.0.2.145/28       | 192.0.2.161/28   |
+# |  |                      |                  |
+# +--|----------------------|------------------+
+#    |                      |
+# +--|----------------------|------------------+
+# |  |                      |                  |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan) |
+# |  | 192.0.2.146/28       | 192.0.2.162/28   |
+# |  |                      |                  |
+# |  \______________________/                  |
+# |                   |                        |
+# |                   |                        |
+# |               $ul31 +                      |
+# |      192.0.2.177/28 |                  SW3 |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# |                     + $ul4                 |
+# |                     ^ 192.0.2.178/28       |
+# |                     |                      |
+# |  + g2 (gre)         |                      |
+# |    loc=192.0.2.66   |                      |
+# |    rem=192.0.2.65 --'                      |
+# |    tos=inherit                             |
+# |                                            |
+# |               $ol4 +                       |
+# |   2001:db8:2::1/64 |                   SW4 |
+# +--------------------|-----------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |   2001:db8:2::2/64        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv6
+	multipath_ipv6
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::2/64
+	ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+	simple_if_fini $h1 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 192.0.2.129/28
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+
+	ip route del vrf v$ol1 192.0.2.66/32
+	__simple_if_fini g1 192.0.2.65/32
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 192.0.2.129/28
+	simple_if_fini $ol1 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 192.0.2.130/28
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 192.0.2.145/28
+	vlan_create $ul22 222 v$ul21 192.0.2.161/28
+
+	ip route add vrf v$ul21 192.0.2.65/32 via 192.0.2.129
+	ip route add vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+}
+
+sw2_destroy()
+{
+	ip route del vrf v$ul21 192.0.2.66/32
+	ip route del vrf v$ul21 192.0.2.65/32
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 192.0.2.130/28
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 192.0.2.177/28
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 192.0.2.146/28
+	vlan_create $ul32 222 v$ul31 192.0.2.162/28
+
+	ip route add vrf v$ul31 192.0.2.66/32 via 192.0.2.178
+	ip route add vrf v$ul31 192.0.2.65/32 \
+	   nexthop via 192.0.2.145 \
+	   nexthop via 192.0.2.161
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip route del vrf v$ul31 192.0.2.65/32
+	ip route del vrf v$ul31 192.0.2.66/32
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 192.0.2.177/28
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 192.0.2.178/28
+
+	tunnel_create g2 gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 192.0.2.66/32
+	ip route add vrf v$ol4 192.0.2.65/32 via 192.0.2.177
+
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+
+	ip route del vrf v$ol4 192.0.2.65/32
+	__simple_if_fini g2 192.0.2.66/32
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 192.0.2.178/28
+	simple_if_fini $ol4 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:db8:2::2/64
+	ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	simple_if_fini $h2 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 weight $weight1 \
+	   nexthop via 192.0.2.162 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
+	       -B "2001:db8:2::2-2001:db8:2::1e" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv6()
+{
+	ping_test $h1 2001:db8:2::2
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 over GRE over IPv4 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
new file mode 100755
index 000000000000..a257979d3fc5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
@@ -0,0 +1,304 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv6
+# GRE tunnel. The tunnel carries IPv4 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# | 192.0.3.{2-62}/24 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-------------------------+
+# | SW1               |                         |
+# |              $ol1 +                         |
+# |      192.0.3.1/24                           |
+# |                                             |
+# |  + g1 (gre)                                 |
+# |    loc=2001:db8:40::1                       |
+# |    rem=2001:db8:40::2 --.                   |
+# |    tos=inherit          |                   |
+# |                         v                   |
+# |                         + $ul1              |
+# |                         | 2001:db8:80::1/64 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# | SW2                     |                   |
+# |                   $ul21 +                   |
+# |       2001:db8:80::2/64                     |
+# |                   |                         |
+# !   ________________|_____                    |
+# |  /                      \                   |
+# |  |                      |                   |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan)  |
+# |  | 2001:db8:81::1/64    | 2001:db8:82::1/64 |
+# |  |                      |                   |
+# +--|----------------------|-------------------+
+#    |                      |
+# +--|----------------------|-------------------+
+# |  |                      |                   |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan)  |
+# |  | 2001:db8:81::2/64    | 2001:db8:82::2/64 |
+# |  |                      |                   |
+# |  \______________________/                   |
+# |                   |                         |
+# |                   |                         |
+# |                   $ul31 +                   |
+# |       2001:db8:83::2/64 |               SW3 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# |                         + $ul4              |
+# |                         ^ 2001:db8:83::1/64 |
+# |  + g2 (gre)             |                   |
+# |    loc=2001:db8:40::2   |                   |
+# |    rem=2001:db8:40::1 --'                   |
+# |    tos=inherit                              |
+# |                                             |
+# |               $ol4 +                        |
+# |       192.0.4.1/24 |                    SW4 |
+# +--------------------|------------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |  192.0.4.{2-62}/24        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	multipath_ipv4
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.3.2/24
+	ip route add vrf v$h1 192.0.4.0/24 via 192.0.3.1
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.4.0/24 via 192.0.3.1
+	simple_if_fini $h1 192.0.3.2/24
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.3.1/24
+	__simple_if_init $ul1 v$ol1 2001:db8:80::1/64
+
+	tunnel_create g1 ip6gre 2001:db8:40::1 2001:db8:40::2 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 2001:db8:40::1/128
+	ip -6 route add vrf v$ol1 2001:db8:40::2/128 via 2001:db8:80::2
+
+	ip route add vrf v$ol1 192.0.4.0/24 nexthop dev g1
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 192.0.4.0/24
+
+	ip -6 route del vrf v$ol1 2001:db8:40::2/128
+	__simple_if_fini g1 2001:db8:40::1/128
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 2001:db8:80::1/64
+	simple_if_fini $ol1 192.0.3.1/24
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 2001:db8:80::2/64
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 2001:db8:81::1/64
+	vlan_create $ul22 222 v$ul21 2001:db8:82::1/64
+
+	ip -6 route add vrf v$ul21 2001:db8:40::1/128 via 2001:db8:80::1
+	ip -6 route add vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$ul21 2001:db8:40::2/128
+	ip -6 route del vrf v$ul21 2001:db8:40::1/128
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 2001:db8:80::2/64
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 2001:db8:83::2/64
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 2001:db8:81::2/64
+	vlan_create $ul32 222 v$ul31 2001:db8:82::2/64
+
+	ip -6 route add vrf v$ul31 2001:db8:40::2/128 via 2001:db8:83::1
+	ip -6 route add vrf v$ul31 2001:db8:40::1/128 \
+	   nexthop via 2001:db8:81::1 \
+	   nexthop via 2001:db8:82::1
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip -6 route del vrf v$ul31 2001:db8:40::1/128
+	ip -6 route del vrf v$ul31 2001:db8:40::2/128
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 2001:Db8:83::2/64
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 192.0.4.1/24
+	__simple_if_init $ul4 v$ol4 2001:db8:83::1/64
+
+	tunnel_create g2 ip6gre 2001:db8:40::2 2001:db8:40::1 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 2001:db8:40::2/128
+	ip -6 route add vrf v$ol4 2001:db8:40::1/128 via 2001:db8:83::2
+
+	ip route add vrf v$ol4 192.0.3.0/24 nexthop dev g2
+}
+
+sw4_destroy()
+{
+	ip route del vrf v$ol4 192.0.3.0/24
+
+	ip -6 route del vrf v$ol4 2001:db8:40::1/128
+	__simple_if_fini g2 2001:db8:40::2/128
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 2001:db8:83::1/64
+	simple_if_fini $ol4 192.0.4.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.4.2/24
+	ip route add vrf v$h2 192.0.3.0/24 via 192.0.4.1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.3.0/24 via 192.0.4.1
+	simple_if_fini $h2 192.0.4.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 weight $weight1 \
+	   nexthop via 2001:db8:82::2 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.4.2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 over GRE over IPv6 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
new file mode 100755
index 000000000000..d208f5243ade
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
@@ -0,0 +1,305 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv6
+# GRE tunnel. The tunnel carries IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |  2001:db8:1::2/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-------------------------+
+# | SW1               |                         |
+# |              $ol1 +                         |
+# |  2001:db8:1::1/64                           |
+# |                                             |
+# |  + g1 (gre)                                 |
+# |    loc=2001:db8:40::1                       |
+# |    rem=2001:db8:40::2 --.                   |
+# |    tos=inherit          |                   |
+# |                         v                   |
+# |                         + $ul1              |
+# |                         | 2001:db8:80::1/64 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# | SW2                     |                   |
+# |                   $ul21 +                   |
+# |       2001:db8:80::2/64                     |
+# |                   |                         |
+# !   ________________|_____                    |
+# |  /                      \                   |
+# |  |                      |                   |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan)  |
+# |  | 2001:db8:81::1/64    | 2001:db8:82::1/64 |
+# |  |                      |                   |
+# +--|----------------------|-------------------+
+#    |                      |
+# +--|----------------------|-------------------+
+# |  |                      |                   |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan)  |
+# |  | 2001:db8:81::2/64    | 2001:db8:82::2/64 |
+# |  |                      |                   |
+# |  \______________________/                   |
+# |                   |                         |
+# |                   |                         |
+# |                   $ul31 +                   |
+# |       2001:db8:83::2/64 |               SW3 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# |                         + $ul4              |
+# |                         ^ 2001:db8:83::1/64 |
+# |  + g2 (gre)             |                   |
+# |    loc=2001:db8:40::2   |                   |
+# |    rem=2001:db8:40::1 --'                   |
+# |    tos=inherit                              |
+# |                                             |
+# |               $ol4 +                        |
+# |   2001:db8:2::1/64 |                    SW4 |
+# +--------------------|------------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |   2001:db8:2::2/64        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv6
+	multipath_ipv6
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::2/64
+	ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+	simple_if_fini $h1 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 2001:db8:80::1/64
+
+	tunnel_create g1 ip6gre 2001:db8:40::1 2001:db8:40::2 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 2001:db8:40::1/128
+	ip -6 route add vrf v$ol1 2001:db8:40::2/128 via 2001:db8:80::2
+
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+
+	ip -6 route del vrf v$ol1 2001:db8:40::2/128
+	__simple_if_fini g1 2001:db8:40::1/128
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 2001:db8:80::1/64
+	simple_if_fini $ol1 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 2001:db8:80::2/64
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 2001:db8:81::1/64
+	vlan_create $ul22 222 v$ul21 2001:db8:82::1/64
+
+	ip -6 route add vrf v$ul21 2001:db8:40::1/128 via 2001:db8:80::1
+	ip -6 route add vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$ul21 2001:db8:40::2/128
+	ip -6 route del vrf v$ul21 2001:db8:40::1/128
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 2001:db8:80::2/64
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 2001:db8:83::2/64
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 2001:db8:81::2/64
+	vlan_create $ul32 222 v$ul31 2001:db8:82::2/64
+
+	ip -6 route add vrf v$ul31 2001:db8:40::2/128 via 2001:db8:83::1
+	ip -6 route add vrf v$ul31 2001:db8:40::1/128 \
+	   nexthop via 2001:db8:81::1 \
+	   nexthop via 2001:db8:82::1
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip -6 route del vrf v$ul31 2001:db8:40::1/128
+	ip -6 route del vrf v$ul31 2001:db8:40::2/128
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 2001:Db8:83::2/64
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 2001:db8:83::1/64
+
+	tunnel_create g2 ip6gre 2001:db8:40::2 2001:db8:40::1 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 2001:db8:40::2/128
+	ip -6 route add vrf v$ol4 2001:db8:40::1/128 via 2001:db8:83::2
+
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+
+	ip -6 route del vrf v$ol4 2001:db8:40::1/128
+	__simple_if_fini g2 2001:db8:40::2/128
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 2001:db8:83::1/64
+	simple_if_fini $ol4 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:db8:2::2/64
+	ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	simple_if_fini $h2 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 weight $weight1 \
+	   nexthop via 2001:db8:82::2 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
+	       -B "2001:db8:2::2-2001:db8:2::1e" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv6()
+{
+	ping_test $h1 2001:db8:2::2
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 over GRE over IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
new file mode 100755
index 000000000000..cf3d26c233e8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -0,0 +1,359 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip nexthop del id 103
+	ip nexthop del id 101
+	ip nexthop del id 102
+	ip nexthop del id 106
+	ip nexthop del id 104
+	ip nexthop del id 105
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip nexthop del id 201
+	ip nexthop del id 202
+	ip nexthop del id 204
+	ip nexthop del id 205
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+	ip nexthop add id 101 via 169.254.2.22 dev $rp12
+	ip nexthop add id 102 via 169.254.3.23 dev $rp13
+	ip nexthop add id 103 group 101/102
+	ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+	ip nexthop add id 104 via fe80:2::22 dev $rp12
+	ip nexthop add id 105 via fe80:3::23 dev $rp13
+	ip nexthop add id 106 group 104/105
+	ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+	ip nexthop add id 201 via 169.254.2.12 dev $rp22
+	ip nexthop add id 202 via 169.254.3.13 dev $rp23
+	ip nexthop add id 203 group 201/202
+	ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+	ip nexthop add id 204 via fe80:2::12 dev $rp22
+	ip nexthop add id 205 via fe80:3::13 dev $rp23
+	ip nexthop add id 206 group 204/205
+	ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the configured weights.
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight_rp12/102,$weight_rp13
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	# Restore settings.
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the configured weights.
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+	ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	ip nexthop replace id 106 group 104/105
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	ip nexthop replace id 106 group 104/105
+}
+
+multipath_test()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+	routing_nh_obj
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Nexthop objects not supported; skipping tests"
+	exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 124803eea4a9..058c746ee300 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
 	match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \
-	match_ip_tos_test"
+	match_ip_tos_test match_indev_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -310,6 +310,30 @@ match_ip_tos_test()
 	log_test "ip_tos match ($tcflags)"
 }
 
+match_indev_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags indev $h1 dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags indev $h2 dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "indev match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_router.sh b/tools/testing/selftests/net/forwarding/tc_flower_router.sh
new file mode 100755
index 000000000000..4aee9c9e69f6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_router.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_indev_egress_test"
+NUM_NETIFS=6
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+
+	ip route add 192.0.2.0/24 vrf v$h1 nexthop via 192.0.1.2
+	ip route add 192.0.3.0/24 vrf v$h1 nexthop via 192.0.1.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.3.0/24 vrf v$h1
+	ip route del 192.0.2.0/24 vrf v$h1
+
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+
+	ip route add 192.0.1.0/24 vrf v$h2 nexthop via 192.0.2.2
+	ip route add 192.0.3.0/24 vrf v$h2 nexthop via 192.0.2.2
+}
+
+h2_destroy()
+{
+	ip route del 192.0.3.0/24 vrf v$h2
+	ip route del 192.0.1.0/24 vrf v$h2
+
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.3.1/24
+
+	ip route add 192.0.1.0/24 vrf v$h3 nexthop via 192.0.3.2
+	ip route add 192.0.2.0/24 vrf v$h3 nexthop via 192.0.3.2
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/24 vrf v$h3
+	ip route del 192.0.1.0/24 vrf v$h3
+
+	simple_if_fini $h3 192.0.3.1/24
+}
+
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+	ip link set dev $rp3 up
+
+	tc qdisc add dev $rp3 clsact
+
+	ip address add 192.0.1.2/24 dev $rp1
+	ip address add 192.0.2.2/24 dev $rp2
+	ip address add 192.0.3.2/24 dev $rp3
+}
+
+router_destroy()
+{
+	ip address del 192.0.3.2/24 dev $rp3
+	ip address del 192.0.2.2/24 dev $rp2
+	ip address del 192.0.1.2/24 dev $rp1
+
+	tc qdisc del dev $rp3 clsact
+
+	ip link set dev $rp3 down
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+match_indev_egress_test()
+{
+	RET=0
+
+	tc filter add dev $rp3 egress protocol ip pref 1 handle 101 flower \
+		$tcflags indev $rp1 dst_ip 192.0.3.1 action drop
+	tc filter add dev $rp3 egress protocol ip pref 2 handle 102 flower \
+		$tcflags indev $rp2 dst_ip 192.0.3.1 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac -A 192.0.1.1 -B 192.0.3.1 \
+		-t ip -q
+
+	tc_check_packets "dev $rp3 egress" 102 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $rp3 egress" 101 1
+	check_err $? "Did not match on correct filter"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $rp2mac -A 192.0.2.1 -B 192.0.3.1 \
+		-t ip -q
+
+	tc_check_packets "dev $rp3 egress" 101 2
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $rp3 egress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $rp3 egress protocol ip pref 2 handle 102 flower
+	tc filter del dev $rp3 egress protocol ip pref 1 handle 101 flower
+
+	log_test "indev egress match ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	h2=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	h3=${NETIFS[p5]}
+	rp3=${NETIFS[p6]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+	h2mac=$(mac_get $h2)
+	rp2mac=$(mac_get $rp2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index 9826a446e2c0..772e00ac3230 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="shared_block_test"
+ALL_TESTS="shared_block_test match_indev_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -70,6 +70,33 @@ shared_block_test()
 	log_test "shared block ($tcflags)"
 }
 
+match_indev_test()
+{
+	RET=0
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		$tcflags indev $swp1 dst_mac $swmac action drop
+	tc filter add block 22 protocol ip pref 2 handle 102 flower \
+		$tcflags indev $swp2 dst_mac $swmac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 1
+	check_err $? "Did not match first incoming packet on a block"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 102 1
+	check_err $? "Did not match second incoming packet on a block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+	tc filter del block 22 protocol ip pref 2 handle 102 flower
+
+	log_test "indev match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
new file mode 100755
index 000000000000..18c5de53558a
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -0,0 +1,534 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# redirect test
+#
+#                     .253 +----+
+#                     +----| r1 |
+#                     |    +----+
+# +----+              |       |.1
+# | h1 |--------------+       |   10.1.1.0/30 2001:db8:1::0/126
+# +----+ .1           |       |.2
+#         172.16.1/24 |    +----+                   +----+
+#    2001:db8:16:1/64 +----| r2 |-------------------| h2 |
+#                     .254 +----+ .254           .2 +----+
+#                                    172.16.2/24
+#                                  2001:db8:16:2/64
+#
+# Route from h1 to h2 goes through r1, eth1 - connection between r1 and r2.
+# Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent
+# from r1 to h1 telling h1 to use r2 when talking to h2.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+R1_R2_N1_IP=10.1.1.1
+R2_R1_N1_IP=10.1.1.2
+
+R1_R2_N1_IP6=2001:db8:1::1
+R2_R1_N1_IP6=2001:db8:1::2
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+H2_N2_IP=172.16.2.2
+R2_N2_IP=172.16.2.254
+H2_N2_IP6=2001:db8:16:2::2
+R2_N2_IP6=2001:db8:16:2::254
+
+VRF=red
+VRF_TABLE=1111
+
+################################################################################
+# helpers
+
+log_section()
+{
+	echo
+	echo "###########################################################################"
+	echo "$*"
+	echo "###########################################################################"
+	echo
+}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_debug()
+{
+	if [ "$VERBOSE" = "1" ]; then
+		echo "$*"
+	fi
+}
+
+run_cmd()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "$out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+get_linklocal()
+{
+	local ns=$1
+	local dev=$2
+	local addr
+
+	addr=$(ip -netns $ns -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+	local ns
+
+	for ns in h1 h2 r1 r2; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+create_vrf()
+{
+	local ns=$1
+
+	ip -netns ${ns} link add ${VRF} type vrf table ${VRF_TABLE}
+	ip -netns ${ns} link set ${VRF} up
+	ip -netns ${ns} route add vrf ${VRF} unreachable default metric 8192
+	ip -netns ${ns} -6 route add vrf ${VRF} unreachable default metric 8192
+
+	ip -netns ${ns} addr add 127.0.0.1/8 dev ${VRF}
+	ip -netns ${ns} -6 addr add ::1 dev ${VRF} nodad
+
+	ip -netns ${ns} ru del pref 0
+	ip -netns ${ns} ru add pref 32765 from all lookup local
+	ip -netns ${ns} -6 ru del pref 0
+	ip -netns ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+setup()
+{
+	local ns
+
+	#
+	# create nodes as namespaces
+	#
+	for ns in h1 h2 r1 r2; do
+		ip netns add $ns
+		ip -netns $ns li set lo up
+
+		case "${ns}" in
+		h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+			;;
+		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
+		esac
+	done
+
+	#
+	# create interconnects
+	#
+	ip -netns h1 li add eth0 type veth peer name r1h1
+	ip -netns h1 li set r1h1 netns r1 name eth0 up
+
+	ip -netns h1 li add eth1 type veth peer name r2h1
+	ip -netns h1 li set r2h1 netns r2 name eth0 up
+
+	ip -netns h2 li add eth0 type veth peer name r2h2
+	ip -netns h2 li set eth0 up
+	ip -netns h2 li set r2h2 netns r2 name eth2 up
+
+	ip -netns r1 li add eth1 type veth peer name r2r1
+	ip -netns r1 li set eth1 up
+	ip -netns r1 li set r2r1 netns r2 name eth1 up
+
+	#
+	# h1
+	#
+	if [ "${WITH_VRF}" = "yes" ]; then
+		create_vrf "h1"
+		H1_VRF_ARG="vrf ${VRF}"
+		H1_PING_ARG="-I ${VRF}"
+	else
+		H1_VRF_ARG=
+		H1_PING_ARG=
+	fi
+	ip -netns h1 li add br0 type bridge
+	if [ "${WITH_VRF}" = "yes" ]; then
+		ip -netns h1 li set br0 vrf ${VRF} up
+	else
+		ip -netns h1 li set br0 up
+	fi
+	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns h1 li set eth0 master br0 up
+	ip -netns h1 li set eth1 master br0 up
+
+	#
+	# h2
+	#
+	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns h2 ro add default via ${R2_N2_IP} dev eth0
+	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0
+
+	#
+	# r1
+	#
+	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30
+	ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
+
+	#
+	# r2
+	#
+	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30
+	ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
+	ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24
+	ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
+
+	sleep 2
+
+	R1_LLADDR=$(get_linklocal r1 eth0)
+	if [ $? -ne 0 ]; then
+		echo "Error: Failed to get link-local address of r1's eth0"
+		exit 1
+	fi
+	log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}"
+
+	R2_LLADDR=$(get_linklocal r2 eth0)
+	if [ $? -ne 0 ]; then
+		echo "Error: Failed to get link-local address of r2's eth0"
+		exit 1
+	fi
+	log_debug "initial gateway is R2's lladdr = ${R2_LLADDR}"
+}
+
+change_h2_mtu()
+{
+	local mtu=$1
+
+	run_cmd ip -netns h2 li set eth0 mtu ${mtu}
+	run_cmd ip -netns r2 li set eth2 mtu ${mtu}
+}
+
+check_exception()
+{
+	local mtu="$1"
+	local with_redirect="$2"
+	local desc="$3"
+
+	# From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102)
+	if [ "$VERBOSE" = "1" ]; then
+		echo "Commands to check for exception:"
+		run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
+		run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
+	fi
+
+	if [ -n "${mtu}" ]; then
+		mtu=" mtu ${mtu}"
+	fi
+	if [ "$with_redirect" = "yes" ]; then
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -q "cache <redirected> expires [0-9]*sec${mtu}"
+	elif [ -n "${mtu}" ]; then
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -q "cache expires [0-9]*sec${mtu}"
+	else
+		# want to verify that neither mtu nor redirected appears in
+		# the route get output. The -v will wipe out the cache line
+		# if either are set so the last grep -q will not find a match
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -E -v 'mtu|redirected' | grep -q "cache"
+	fi
+	log_test $? 0 "IPv4: ${desc}"
+
+	if [ "$with_redirect" = "yes" ]; then
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+	elif [ -n "${mtu}" ]; then
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -q "${mtu}"
+	else
+		# IPv6 is a bit harder. First strip out the match if it
+		# contains an mtu exception and then look for the first
+		# gateway - R1's lladdr
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -v "mtu" | grep -q "${R1_LLADDR}"
+	fi
+	log_test $? 0 "IPv6: ${desc}"
+}
+
+run_ping()
+{
+	local sz=$1
+
+	run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+}
+
+replace_route_new()
+{
+	# r1 to h2 via r2 and eth0
+	run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
+	run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
+}
+
+reset_route_new()
+{
+	run_cmd ip -netns r1 nexthop flush
+	run_cmd ip -netns h1 nexthop flush
+
+	initial_route_new
+}
+
+initial_route_new()
+{
+	# r1 to h2 via r2 and eth1
+	run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
+	run_cmd ip -netns r1 ro add ${H2_N2} nhid 1
+
+	run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
+	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2
+
+	# h1 to h2 via r1
+	run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0
+	run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
+
+	run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0
+	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
+}
+
+replace_route_legacy()
+{
+	# r1 to h2 via r2 and eth0
+	run_cmd ip -netns r1    ro replace ${H2_N2}   via ${R2_N1_IP}  dev eth0
+	run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
+}
+
+reset_route_legacy()
+{
+	run_cmd ip -netns r1    ro del ${H2_N2}
+	run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+
+	run_cmd ip -netns h1    ro del ${H1_VRF_ARG} ${H2_N2}
+	run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
+
+	initial_route_legacy
+}
+
+initial_route_legacy()
+{
+	# r1 to h2 via r2 and eth1
+	run_cmd ip -netns r1    ro add ${H2_N2}   via ${R2_R1_N1_IP}  dev eth1
+	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
+
+	# h1 to h2 via r1
+	# - IPv6 redirect only works if gateway is the LLA
+	run_cmd ip -netns h1    ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
+}
+
+check_connectivity()
+{
+	local rc
+
+	run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
+	rc=$?
+	run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
+	[ $? -ne 0 ] && rc=$?
+
+	return $rc
+}
+
+do_test()
+{
+	local ttype="$1"
+
+	eval initial_route_${ttype}
+
+	# verify connectivity
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken"
+		ret=1
+		return
+	fi
+
+	# redirect exception followed by mtu
+	eval replace_route_${ttype}
+	run_ping 64
+	check_exception "" "yes" "redirect exception"
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after redirect"
+		ret=1
+		return
+	fi
+
+	change_h2_mtu 1300
+	run_ping 1350
+	check_exception "1300" "yes" "redirect exception plus mtu"
+
+	# remove exceptions and restore routing
+	change_h2_mtu 1500
+	eval reset_route_${ttype}
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after reset"
+		ret=1
+		return
+	fi
+	check_exception "" "no" "routing reset"
+
+	# MTU exception followed by redirect
+	change_h2_mtu 1300
+	run_ping 1350
+	check_exception "1300" "no" "mtu exception"
+
+	eval replace_route_${ttype}
+	run_ping 64
+	check_exception "1300" "yes" "mtu exception plus redirect"
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after redirect"
+		ret=1
+		return
+	fi
+}
+
+################################################################################
+# usage
+
+usage()
+{
+        cat <<EOF
+usage: ${0##*/} OPTS
+
+	-p          Pause on fail
+	-v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :pv o
+do
+	case $o in
+                p) PAUSE_ON_FAIL=yes;;
+                v) VERBOSE=$(($VERBOSE + 1));;
+                *) usage; exit 1;;
+	esac
+done
+
+trap cleanup EXIT
+
+cleanup
+WITH_VRF=no
+setup
+
+log_section "Legacy routing"
+do_test "legacy"
+
+cleanup
+log_section "Legacy routing with VRF"
+WITH_VRF=yes
+setup
+do_test "legacy"
+
+cleanup
+log_section "Routing with nexthop objects"
+ip nexthop ls >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+	WITH_VRF=no
+	setup
+	do_test "new"
+
+	cleanup
+	log_section "Routing with nexthop objects and VRF"
+	WITH_VRF=yes
+	setup
+	do_test "new"
+else
+	echo "Nexthop objects not supported; skipping tests"
+fi
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c
new file mode 100644
index 000000000000..a7c41375374f
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO cmsg on send and recv */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWINFO
+#define IPV6_FLOWINFO 11
+#endif
+
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR 32
+#endif
+
+#define FLOWLABEL_WILDCARD	((uint32_t) -1)
+
+static const char cfg_data[]	= "a";
+static uint32_t cfg_label	= 1;
+
+static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel)
+{
+	char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	int ret;
+
+	iov.iov_base = (char *)cfg_data;
+	iov.iov_len = sizeof(cfg_data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	if (with_flowlabel) {
+		struct cmsghdr *cm;
+
+		cm = (void *)control;
+		cm->cmsg_len = CMSG_LEN(sizeof(flowlabel));
+		cm->cmsg_level = SOL_IPV6;
+		cm->cmsg_type = IPV6_FLOWINFO;
+		*(uint32_t *)CMSG_DATA(cm) = htonl(flowlabel);
+
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+	}
+
+	ret = sendmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "send");
+
+	if (with_flowlabel)
+		fprintf(stderr, "sent with label %u\n", flowlabel);
+	else
+		fprintf(stderr, "sent without label\n");
+}
+
+static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
+{
+	char control[CMSG_SPACE(sizeof(expect))];
+	char data[sizeof(cfg_data)];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+	uint32_t flowlabel;
+	int ret;
+
+	iov.iov_base = data;
+	iov.iov_len = sizeof(data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	memset(control, 0, sizeof(control));
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+		error(1, 0, "recv: truncated");
+	if (ret != sizeof(cfg_data))
+		error(1, 0, "recv: length mismatch");
+	if (memcmp(data, cfg_data, sizeof(data)))
+		error(1, 0, "recv: data mismatch");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (with_flowlabel) {
+		if (!cm)
+			error(1, 0, "recv: missing cmsg");
+		if (CMSG_NXTHDR(&msg, cm))
+			error(1, 0, "recv: too many cmsg");
+		if (cm->cmsg_level != SOL_IPV6 ||
+		    cm->cmsg_type != IPV6_FLOWINFO)
+			error(1, 0, "recv: unexpected cmsg level or type");
+
+		flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm));
+		fprintf(stderr, "recv with label %u\n", flowlabel);
+
+		if (expect != FLOWLABEL_WILDCARD && expect != flowlabel)
+			fprintf(stderr, "recv: incorrect flowlabel %u != %u\n",
+					flowlabel, expect);
+
+	} else {
+		fprintf(stderr, "recv without label\n");
+	}
+}
+
+static bool get_autoflowlabel_enabled(void)
+{
+	int fd, ret;
+	char val;
+
+	fd = open("/proc/sys/net/ipv6/auto_flowlabels", O_RDONLY);
+	if (fd == -1)
+		error(1, errno, "open sysctl");
+
+	ret = read(fd, &val, 1);
+	if (ret == -1)
+		error(1, errno, "read sysctl");
+	if (ret == 0)
+		error(1, 0, "read sysctl: 0");
+
+	if (close(fd))
+		error(1, errno, "close sysctl");
+
+	return val == '1';
+}
+
+static void flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_GET,
+		.flr_label = htonl(label),
+		.flr_flags = flags,
+		.flr_share = share,
+	};
+
+	/* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+	req.flr_dst.s6_addr[0] = 0xfd;
+	req.flr_dst.s6_addr[15] = 0x1;
+
+	if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req)))
+		error(1, errno, "setsockopt flowlabel get");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "l:")) != -1) {
+		switch (c) {
+		case 'l':
+			cfg_label = strtoul(optarg, NULL, 0);
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct sockaddr_in6 addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(8000),
+		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
+	};
+	const int one = 1;
+	int fdt, fdr;
+
+	parse_opts(argc, argv);
+
+	fdt = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	fdr = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	if (connect(fdt, (void *)&addr, sizeof(addr)))
+		error(1, errno, "connect");
+	if (bind(fdr, (void *)&addr, sizeof(addr)))
+		error(1, errno, "bind");
+
+	flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE);
+
+	if (setsockopt(fdr, SOL_IPV6, IPV6_FLOWINFO, &one, sizeof(one)))
+		error(1, errno, "setsockopt flowinfo");
+
+	if (get_autoflowlabel_enabled()) {
+		fprintf(stderr, "send no label: recv auto flowlabel\n");
+		do_send(fdt, false, 0);
+		do_recv(fdr, true, FLOWLABEL_WILDCARD);
+	} else {
+		fprintf(stderr, "send no label: recv no label (auto off)\n");
+		do_send(fdt, false, 0);
+		do_recv(fdr, false, 0);
+	}
+
+	fprintf(stderr, "send label\n");
+	do_send(fdt, true, cfg_label);
+	do_recv(fdr, true, cfg_label);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdt))
+		error(1, errno, "close t");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
new file mode 100755
index 000000000000..d3bc6442704e
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for IPv6 flowlabels
+#
+# run in separate namespaces to avoid mgmt db conflicts betweent tests
+
+set -e
+
+echo "TEST management"
+./in_netns.sh ./ipv6_flowlabel_mgr
+
+echo "TEST datapath"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=0 && ./ipv6_flowlabel -l 1'
+
+echo "TEST datapath (with auto-flowlabels)"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1'
+
+echo OK. All tests passed
diff --git a/tools/testing/selftests/net/ipv6_flowlabel_mgr.c b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
new file mode 100644
index 000000000000..af95b48acea9
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO_MGR */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR	32
+#endif
+
+/* from net/ipv6/ip6_flowlabel.c */
+#define FL_MIN_LINGER		6
+
+#define explain(x)							\
+	do { if (cfg_verbose) fprintf(stderr, "       " x "\n"); } while (0)
+
+#define __expect(x)							\
+	do {								\
+		if (!(x))						\
+			fprintf(stderr, "[OK]   " #x "\n");		\
+		else							\
+			error(1, 0, "[ERR]  " #x " (line %d)", __LINE__); \
+	} while (0)
+
+#define expect_pass(x)	__expect(x)
+#define expect_fail(x)	__expect(!(x))
+
+static bool cfg_long_running;
+static bool cfg_verbose;
+
+static int flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_GET,
+		.flr_label = htonl(label),
+		.flr_flags = flags,
+		.flr_share = share,
+	};
+
+	/* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+	req.flr_dst.s6_addr[0] = 0xfd;
+	req.flr_dst.s6_addr[15] = 0x1;
+
+	return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static int flowlabel_put(int fd, uint32_t label)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_PUT,
+		.flr_label = htonl(label),
+	};
+
+	return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static void run_tests(int fd)
+{
+	int wstatus;
+	pid_t pid;
+
+	explain("cannot get non-existent label");
+	expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+
+	explain("cannot put non-existent label");
+	expect_fail(flowlabel_put(fd, 1));
+
+	explain("cannot create label greater than 20 bits");
+	expect_fail(flowlabel_get(fd, 0x1FFFFF, IPV6_FL_S_ANY,
+				  IPV6_FL_F_CREATE));
+
+	explain("create a new label (FL_F_CREATE)");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+	explain("can get the label (without FL_F_CREATE)");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+	explain("can get it again with create flag set, too");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+	explain("cannot get it again with the exclusive (FL_FL_EXCL) flag");
+	expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY,
+					 IPV6_FL_F_CREATE | IPV6_FL_F_EXCL));
+	explain("can now put exactly three references");
+	expect_pass(flowlabel_put(fd, 1));
+	expect_pass(flowlabel_put(fd, 1));
+	expect_pass(flowlabel_put(fd, 1));
+	expect_fail(flowlabel_put(fd, 1));
+
+	explain("create a new exclusive label (FL_S_EXCL)");
+	expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+	explain("cannot get it again in non-exclusive mode");
+	expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY,  IPV6_FL_F_CREATE));
+	explain("cannot get it again in exclusive mode either");
+	expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+	expect_pass(flowlabel_put(fd, 2));
+
+	if (cfg_long_running) {
+		explain("cannot reuse the label, due to linger");
+		expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+					  IPV6_FL_F_CREATE));
+		explain("after sleep, can reuse");
+		sleep(FL_MIN_LINGER * 2 + 1);
+		expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+					  IPV6_FL_F_CREATE));
+	}
+
+	explain("create a new user-private label (FL_S_USER)");
+	expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, IPV6_FL_F_CREATE));
+	explain("cannot get it again in non-exclusive mode");
+	expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_ANY, 0));
+	explain("cannot get it again in exclusive mode");
+	expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_EXCL, 0));
+	explain("can get it again in user mode");
+	expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+	explain("child process can get it too, but not after setuid(nobody)");
+	pid = fork();
+	if (pid == -1)
+		error(1, errno, "fork");
+	if (!pid) {
+		expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+		if (setuid(USHRT_MAX))
+			fprintf(stderr, "[INFO] skip setuid child test\n");
+		else
+			expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+		exit(0);
+	}
+	if (wait(&wstatus) == -1)
+		error(1, errno, "wait");
+	if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+		error(1, errno, "wait: unexpected child result");
+
+	explain("create a new process-private label (FL_S_PROCESS)");
+	expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, IPV6_FL_F_CREATE));
+	explain("can get it again");
+	expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+	explain("child process cannot can get it");
+	pid = fork();
+	if (pid == -1)
+		error(1, errno, "fork");
+	if (!pid) {
+		expect_fail(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+		exit(0);
+	}
+	if (wait(&wstatus) == -1)
+		error(1, errno, "wait");
+	if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+		error(1, errno, "wait: unexpected child result");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "lv")) != -1) {
+		switch (c) {
+		case 'l':
+			cfg_long_running = true;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	parse_opts(argc, argv);
+
+	fd = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	run_tests(fd);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 317dafcd605d..ab367e75f095 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -111,6 +111,14 @@
 #
 # - cleanup_ipv6_exception
 #	Same as above, but use IPv6 transport from A to B
+#
+# - list_flush_ipv4_exception
+#	Using the same topology as in pmtu_ipv4, create exceptions, and check
+#	they are shown when listing exception caches, gone after flushing them
+#
+# - list_flush_ipv6_exception
+#	Using the same topology as in pmtu_ipv6, create exceptions, and check
+#	they are shown when listing exception caches, gone after flushing them
 
 
 # Kselftest framework requirement - SKIP code is 4.
@@ -123,39 +131,42 @@ TRACING=0
 # Some systems don't have a ping6 binary anymore
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
+#               Name                          Description                  re-run with nh
 tests="
-	pmtu_ipv4_exception		ipv4: PMTU exceptions
-	pmtu_ipv6_exception		ipv6: PMTU exceptions
-	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions
-	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions
-	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions
-	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions
-	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions
-	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions
-	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions
-	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions
-	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions
-	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions
-	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions
-	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions
-	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions
-	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions
-	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions
-	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions
-	pmtu_vti6_exception		vti6: PMTU exceptions
-	pmtu_vti4_exception		vti4: PMTU exceptions
-	pmtu_vti4_default_mtu		vti4: default MTU assignment
-	pmtu_vti6_default_mtu		vti6: default MTU assignment
-	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
-	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
-	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes
-	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions
-	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions"
-
-NS_A="ns-$(mktemp -u XXXXXX)"
-NS_B="ns-$(mktemp -u XXXXXX)"
-NS_R1="ns-$(mktemp -u XXXXXX)"
-NS_R2="ns-$(mktemp -u XXXXXX)"
+	pmtu_ipv4_exception		ipv4: PMTU exceptions			1
+	pmtu_ipv6_exception		ipv6: PMTU exceptions			1
+	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions	1
+	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions	1
+	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions	1
+	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions	1
+	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions	1
+	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions	1
+	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions	1
+	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions	1
+	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions		1
+	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions		1
+	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions		1
+	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions		1
+	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions		1
+	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions		1
+	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions		1
+	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions		1
+	pmtu_vti6_exception		vti6: PMTU exceptions			0
+	pmtu_vti4_exception		vti4: PMTU exceptions			0
+	pmtu_vti4_default_mtu		vti4: default MTU assignment		0
+	pmtu_vti6_default_mtu		vti6: default MTU assignment		0
+	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation	0
+	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation	0
+	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes	0
+	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions	1
+	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1
+	list_flush_ipv4_exception	ipv4: list and flush cached exceptions	1
+	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1"
+
+NS_A="ns-A"
+NS_B="ns-B"
+NS_R1="ns-R1"
+NS_R2="ns-R2"
 ns_a="ip netns exec ${NS_A}"
 ns_b="ip netns exec ${NS_B}"
 ns_r1="ip netns exec ${NS_R1}"
@@ -194,6 +205,30 @@ routes="
 	B	default			${prefix6}:${b_r1}::2
 "
 
+USE_NH="no"
+#	ns	family	nh id	   destination		gateway
+nexthops="
+	A	4	41	${prefix4}.${a_r1}.2	veth_A-R1
+	A	4	42	${prefix4}.${a_r2}.2	veth_A-R2
+	B	4	41	${prefix4}.${b_r1}.2	veth_B-R1
+
+	A	6	61	${prefix6}:${a_r1}::2	veth_A-R1
+	A	6	62	${prefix6}:${a_r2}::2	veth_A-R2
+	B	6	61	${prefix6}:${b_r1}::2	veth_B-R1
+"
+
+# nexthop id correlates to id in nexthops config above
+#	ns    family	prefix			nh id
+routes_nh="
+	A	4	default			41
+	A	4	${prefix4}.${b_r2}.1	42
+	B	4	default			41
+
+	A	6	default			61
+	A	6	${prefix6}:${b_r2}::1	62
+	B	6	default			61
+"
+
 veth4_a_addr="192.168.1.1"
 veth4_b_addr="192.168.1.2"
 veth4_mask="24"
@@ -212,7 +247,6 @@ dummy6_0_prefix="fc00:1000::"
 dummy6_1_prefix="fc00:1001::"
 dummy6_mask="64"
 
-cleanup_done=1
 err_buf=
 tcpdump_pids=
 
@@ -449,6 +483,50 @@ setup_xfrm6() {
 	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
 }
 
+setup_routing_old() {
+	for i in ${routes}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
+		[ "${gw}" = "" ]	&& gw="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} route add ${addr} via ${gw}
+
+		ns=""; addr=""; gw=""
+	done
+}
+
+setup_routing_new() {
+	for i in ${nexthops}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${fam}" = "" ]	&& fam="${i}"		&& continue
+		[ "${nhid}" = "" ]	&& nhid="${i}"		&& continue
+		[ "${gw}" = "" ]	&& gw="${i}"		&& continue
+		[ "${dev}" = "" ]	&& dev="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
+
+		ns=""; fam=""; nhid=""; gw=""; dev=""
+
+	done
+
+	for i in ${routes_nh}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${fam}" = "" ]	&& fam="${i}"		&& continue
+		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
+		[ "${nhid}" = "" ]	&& nhid="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
+
+		ns=""; fam=""; addr=""; nhid=""
+	done
+}
+
 setup_routing() {
 	for i in ${NS_R1} ${NS_R2}; do
 		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
@@ -479,23 +557,19 @@ setup_routing() {
 		ns=""; peer=""; segment=""
 	done
 
-	for i in ${routes}; do
-		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
-		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
-		[ "${gw}" = "" ]	&& gw="${i}"
-
-		ns_name="$(nsname ${ns})"
-
-		ip -n ${ns_name} route add ${addr} via ${gw}
+	if [ "$USE_NH" = "yes" ]; then
+		setup_routing_new
+	else
+		setup_routing_old
+	fi
 
-		ns=""; addr=""; gw=""
-	done
+	return 0
 }
 
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
-	cleanup_done=0
+	cleanup
 	for arg do
 		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
 	done
@@ -519,11 +593,9 @@ cleanup() {
 	done
 	tcpdump_pids=
 
-	[ ${cleanup_done} -eq 1 ] && return
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
 	done
-	cleanup_done=1
 }
 
 mtu() {
@@ -1093,6 +1165,158 @@ test_cleanup_ipv4_exception() {
 	test_cleanup_vxlanX_exception 4
 }
 
+run_test() {
+	(
+	tname="$1"
+	tdesc="$2"
+
+	unset IFS
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "\n##########################################################################\n\n"
+	fi
+
+	eval test_${tname}
+	ret=$?
+
+	if [ $ret -eq 0 ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${tdesc}"
+	elif [ $ret -eq 1 ]; then
+		printf "TEST: %-60s  [FAIL]\n" "${tdesc}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "Pausing. Hit enter to continue"
+			read a
+		fi
+		err_flush
+		exit 1
+	elif [ $ret -eq 2 ]; then
+		printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
+		err_flush
+	fi
+
+	return $ret
+	)
+	ret=$?
+	[ $ret -ne 0 ] && exitcode=1
+
+	return $ret
+}
+
+run_test_nh() {
+	tname="$1"
+	tdesc="$2"
+
+	USE_NH=yes
+	run_test "${tname}" "${tdesc} - nexthop objects"
+	USE_NH=no
+}
+
+test_list_flush_ipv4_exception() {
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	dst_prefix1="${prefix4}.${b_r1}."
+	dst2="${prefix4}.${b_r2}.1"
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1500
+	mtu "${ns_b}"  veth_B-R1 1500
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	fail=0
+
+	# Add 100 addresses for veth endpoint on B reached by default A route
+	for i in $(seq 100 199); do
+		run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
+	done
+
+	# Create 100 cached route exceptions for path via R1, one via R2. Note
+	# that with IPv4 we need to actually cause a route lookup that matches
+	# the exception caused by ICMP, in order to actually have a cached
+	# route, so we need to ping each destination twice
+	for i in $(seq 100 199); do
+		run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
+	done
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
+
+	# Each exception is printed as two lines
+	if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then
+		err "  can't list cached exceptions"
+		fail=1
+	fi
+
+	run_cmd ${ns_a} ip route flush cache
+	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
+	pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
+	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
+	   [ -n "$(${ns_a} ip route list cache)" ]; then
+		err "  can't flush cached exceptions"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
+test_list_flush_ipv6_exception() {
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	dst_prefix1="${prefix6}:${b_r1}::"
+	dst2="${prefix6}:${b_r2}::1"
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1500
+	mtu "${ns_b}"  veth_B-R1 1500
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	fail=0
+
+	# Add 100 addresses for veth endpoint on B reached by default A route
+	for i in $(seq 100 199); do
+		run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
+	done
+
+	# Create 100 cached route exceptions for path via R1, one via R2
+	for i in $(seq 100 199); do
+		run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
+	done
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
+	if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then
+		err "  can't list cached exceptions"
+		fail=1
+	fi
+
+	run_cmd ${ns_a} ip -6 route flush cache
+	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
+	pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
+	   [ -n "$(${ns_a} ip -6 route list cache)" ]; then
+		err "  can't flush cached exceptions"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
 usage() {
 	echo
 	echo "$0 [OPTIONS] [TEST]..."
@@ -1136,8 +1360,23 @@ done
 
 trap cleanup EXIT
 
+# start clean
+cleanup
+
+HAVE_NH=no
+ip nexthop ls >/dev/null 2>&1
+[ $? -eq 0 ] && HAVE_NH=yes
+
+name=""
+desc=""
+rerun_nh=0
 for t in ${tests}; do
-	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+	[ "${name}" = "" ]	&& name="${t}"	&& continue
+	[ "${desc}" = "" ]	&& desc="${t}"	&& continue
+
+	if [ "${HAVE_NH}" = "yes" ]; then
+		rerun_nh="${t}"
+	fi
 
 	run_this=1
 	for arg do
@@ -1145,36 +1384,18 @@ for t in ${tests}; do
 		[ "${arg}" = "${name}" ] && run_this=1 && break
 		run_this=0
 	done
-	[ $run_this -eq 0 ] && continue
-
-	(
-		unset IFS
+	if [ $run_this -eq 1 ]; then
+		run_test "${name}" "${desc}"
+		# if test was skipped no need to retry with nexthop objects
+		[ $? -eq 2 ] && rerun_nh=0
 
-		if [ "$VERBOSE" = "1" ]; then
-			printf "\n##########################################################################\n\n"
+		if [ "${rerun_nh}" = "1" ]; then
+			run_test_nh "${name}" "${desc}"
 		fi
-
-		eval test_${name}
-		ret=$?
-		cleanup
-
-		if [ $ret -eq 0 ]; then
-			printf "TEST: %-60s  [ OK ]\n" "${t}"
-		elif [ $ret -eq 1 ]; then
-			printf "TEST: %-60s  [FAIL]\n" "${t}"
-			if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
-				echo
-				echo "Pausing. Hit enter to continue"
-				read a
-			fi
-			err_flush
-			exit 1
-		elif [ $ret -eq 2 ]; then
-			printf "TEST: %-60s  [SKIP]\n" "${t}"
-			err_flush
-		fi
-	)
-	[ $? -ne 0 ] && exitcode=1
+	fi
+	name=""
+	desc=""
+	rerun_nh=0
 done
 
 exit ${exitcode}
diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh
new file mode 100755
index 000000000000..116bfeab72fa
--- /dev/null
+++ b/tools/testing/selftests/net/route_localnet.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a couple of tests when route_localnet = 1.
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+setup() {
+    ip netns add "${PEER_NS}"
+    ip -netns "${PEER_NS}" link set dev lo up
+    ip link add name veth0 type veth peer name veth1
+    ip link set dev veth0 up
+    ip link set dev veth1 netns "${PEER_NS}"
+
+    # Enable route_localnet and delete useless route 127.0.0.0/8.
+    sysctl -w net.ipv4.conf.veth0.route_localnet=1
+    ip netns exec "${PEER_NS}" sysctl -w net.ipv4.conf.veth1.route_localnet=1
+    ip route del 127.0.0.0/8 dev lo table local
+    ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local
+
+    ifconfig veth0 127.25.3.4/24 up
+    ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up
+
+    ip route flush cache
+    ip netns exec "${PEER_NS}" ip route flush cache
+}
+
+cleanup() {
+    ip link del veth0
+    ip route add local 127.0.0.0/8 dev lo proto kernel scope host src 127.0.0.1
+    local -r ns="$(ip netns list|grep $PEER_NS)"
+    [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+
+# Run test when arp_announce = 2.
+run_arp_announce_test() {
+    echo "run arp_announce test"
+    setup
+
+    sysctl -w net.ipv4.conf.veth0.arp_announce=2
+    ip netns exec "${PEER_NS}" sysctl -w net.ipv4.conf.veth1.arp_announce=2
+    ping -c5 -I veth0 127.25.3.14
+    if [ $? -ne 0 ];then
+        echo "failed"
+    else
+        echo "ok"
+    fi
+
+    cleanup
+}
+
+# Run test when arp_ignore = 3.
+run_arp_ignore_test() {
+    echo "run arp_ignore test"
+    setup
+
+    sysctl -w net.ipv4.conf.veth0.arp_ignore=3
+    ip netns exec "${PEER_NS}" sysctl -w net.ipv4.conf.veth1.arp_ignore=3
+    ping -c5 -I veth0 127.25.3.14
+    if [ $? -ne 0 ];then
+        echo "failed"
+    else
+        echo "ok"
+    fi
+
+    cleanup
+}
+
+run_all_tests() {
+    run_arp_announce_test
+    run_arp_ignore_test
+}
+
+run_all_tests
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index b25c9fe019d2..bdbf4b3125b6 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -249,6 +249,45 @@ kci_test_route_get()
 	echo "PASS: route get"
 }
 
+kci_test_addrlft()
+{
+	for i in $(seq 10 100) ;do
+		lft=$(((RANDOM%3) + 1))
+		ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
+		check_err $?
+	done
+
+	sleep 5
+
+	ip addr show dev "$devdummy" | grep "10.23.11."
+	if [ $? -eq 0 ]; then
+		echo "FAIL: preferred_lft addresses remaining"
+		check_err 1
+		return
+	fi
+
+	echo "PASS: preferred_lft addresses have expired"
+}
+
+kci_test_promote_secondaries()
+{
+	promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
+
+	sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
+
+	for i in $(seq 2 254);do
+		IP="10.23.11.$i"
+		ip -f inet addr add $IP/16 brd + dev "$devdummy"
+		ifconfig "$devdummy" $IP netmask 255.255.0.0
+	done
+
+	ip addr flush dev "$devdummy"
+
+	[ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0
+
+	echo "PASS: promote_secondaries complete"
+}
+
 kci_test_addrlabel()
 {
 	ret=0
@@ -699,13 +738,17 @@ kci_test_ipsec_offload()
 	sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
 	sysfsf=$sysfsd/ipsec
 	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+	probed=false
 
 	# setup netdevsim since dummydev doesn't have offload support
-	modprobe netdevsim
-	check_err $?
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec_offload can't load netdevsim"
-		return 1
+	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+		modprobe -q netdevsim
+		check_err $?
+		if [ $ret -ne 0 ]; then
+			echo "SKIP: ipsec_offload can't load netdevsim"
+			return $ksft_skip
+		fi
+		probed=true
 	fi
 
 	echo "0" > /sys/bus/netdevsim/new_device
@@ -785,7 +828,7 @@ EOF
 	fi
 
 	# clean up any leftovers
-	rmmod netdevsim
+	$probed && rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ipsec_offload"
@@ -1140,6 +1183,8 @@ kci_test_rtnl()
 
 	kci_test_polrouting
 	kci_test_route_get
+	kci_test_addrlft
+	kci_test_promote_secondaries
 	kci_test_tc
 	kci_test_gre
 	kci_test_gretap
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
new file mode 100644
index 000000000000..53f598f06647
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test the SO_TXTIME API
+ *
+ * Takes two streams of { payload, delivery time }[], one input and one output.
+ * Sends the input stream and verifies arrival matches the output stream.
+ * The two streams can differ due to out-of-order delivery and drops.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/net_tstamp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+static int	cfg_clockid	= CLOCK_TAI;
+static bool	cfg_do_ipv4;
+static bool	cfg_do_ipv6;
+static uint16_t	cfg_port	= 8000;
+static int	cfg_variance_us	= 2000;
+
+static uint64_t glob_tstart;
+
+/* encode one timed transmission (of a 1B payload) */
+struct timed_send {
+	char	data;
+	int64_t	delay_us;
+};
+
+#define MAX_NUM_PKT	8
+static struct timed_send cfg_in[MAX_NUM_PKT];
+static struct timed_send cfg_out[MAX_NUM_PKT];
+static int cfg_num_pkt;
+
+static uint64_t gettime_ns(void)
+{
+	struct timespec ts;
+
+	if (clock_gettime(cfg_clockid, &ts))
+		error(1, errno, "gettime");
+
+	return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
+}
+
+static void do_send_one(int fdt, struct timed_send *ts)
+{
+	char control[CMSG_SPACE(sizeof(uint64_t))];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+	uint64_t tdeliver;
+	int ret;
+
+	iov.iov_base = &ts->data;
+	iov.iov_len = 1;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	if (ts->delay_us >= 0) {
+		memset(control, 0, sizeof(control));
+		msg.msg_control = &control;
+		msg.msg_controllen = sizeof(control);
+
+		tdeliver = glob_tstart + ts->delay_us * 1000;
+
+		cm = CMSG_FIRSTHDR(&msg);
+		cm->cmsg_level = SOL_SOCKET;
+		cm->cmsg_type = SCM_TXTIME;
+		cm->cmsg_len = CMSG_LEN(sizeof(tdeliver));
+		memcpy(CMSG_DATA(cm), &tdeliver, sizeof(tdeliver));
+	}
+
+	ret = sendmsg(fdt, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret == 0)
+		error(1, 0, "write: 0B");
+
+}
+
+static void do_recv_one(int fdr, struct timed_send *ts)
+{
+	int64_t tstop, texpect;
+	char rbuf[2];
+	int ret;
+
+	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+	if (ret == -1)
+		error(1, errno, "read");
+	if (ret != 1)
+		error(1, 0, "read: %dB", ret);
+
+	tstop = (gettime_ns() - glob_tstart) / 1000;
+	texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
+
+	fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n",
+			rbuf[0], tstop, texpect);
+
+	if (rbuf[0] != ts->data)
+		error(1, 0, "payload mismatch. expected %c", ts->data);
+
+	if (labs(tstop - texpect) > cfg_variance_us)
+		error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+}
+
+static void do_recv_verify_empty(int fdr)
+{
+	char rbuf[1];
+	int ret;
+
+	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+	if (ret != -1 || errno != EAGAIN)
+		error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
+}
+
+static void setsockopt_txtime(int fd)
+{
+	struct sock_txtime so_txtime_val = { .clockid = cfg_clockid };
+	struct sock_txtime so_txtime_val_read = { 0 };
+	socklen_t vallen = sizeof(so_txtime_val);
+
+	if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+		       &so_txtime_val, sizeof(so_txtime_val)))
+		error(1, errno, "setsockopt txtime");
+
+	if (getsockopt(fd, SOL_SOCKET, SO_TXTIME,
+		       &so_txtime_val_read, &vallen))
+		error(1, errno, "getsockopt txtime");
+
+	if (vallen != sizeof(so_txtime_val) ||
+	    memcmp(&so_txtime_val, &so_txtime_val_read, vallen))
+		error(1, 0, "getsockopt txtime: mismatch");
+}
+
+static int setup_tx(struct sockaddr *addr, socklen_t alen)
+{
+	int fd;
+
+	fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	if (connect(fd, addr, alen))
+		error(1, errno, "connect");
+
+	setsockopt_txtime(fd);
+
+	return fd;
+}
+
+static int setup_rx(struct sockaddr *addr, socklen_t alen)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fd;
+
+	fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (bind(fd, addr, alen))
+		error(1, errno, "bind");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	return fd;
+}
+
+static void do_test(struct sockaddr *addr, socklen_t alen)
+{
+	int fdt, fdr, i;
+
+	fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
+			addr->sa_family == PF_INET ? '4' : '6',
+			cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
+
+	fdt = setup_tx(addr, alen);
+	fdr = setup_rx(addr, alen);
+
+	glob_tstart = gettime_ns();
+
+	for (i = 0; i < cfg_num_pkt; i++)
+		do_send_one(fdt, &cfg_in[i]);
+	for (i = 0; i < cfg_num_pkt; i++)
+		do_recv_one(fdr, &cfg_out[i]);
+
+	do_recv_verify_empty(fdr);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdt))
+		error(1, errno, "close t");
+}
+
+static int parse_io(const char *optarg, struct timed_send *array)
+{
+	char *arg, *tok;
+	int aoff = 0;
+
+	arg = strdup(optarg);
+	if (!arg)
+		error(1, errno, "strdup");
+
+	while ((tok = strtok(arg, ","))) {
+		arg = NULL;	/* only pass non-zero on first call */
+
+		if (aoff / 2 == MAX_NUM_PKT)
+			error(1, 0, "exceeds max pkt count (%d)", MAX_NUM_PKT);
+
+		if (aoff & 1) {	/* parse delay */
+			array->delay_us = strtol(tok, NULL, 0) * 1000;
+			array++;
+		} else {	/* parse character */
+			array->data = tok[0];
+		}
+
+		aoff++;
+	}
+
+	free(arg);
+
+	return aoff / 2;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c, ilen, olen;
+
+	while ((c = getopt(argc, argv, "46c:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'c':
+			if (!strcmp(optarg, "tai"))
+				cfg_clockid = CLOCK_TAI;
+			else if (!strcmp(optarg, "monotonic") ||
+				 !strcmp(optarg, "mono"))
+				cfg_clockid = CLOCK_MONOTONIC;
+			else
+				error(1, 0, "unknown clock id %s", optarg);
+			break;
+		default:
+			error(1, 0, "parse error at %d", optind);
+		}
+	}
+
+	if (argc - optind != 2)
+		error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+
+	ilen = parse_io(argv[optind], cfg_in);
+	olen = parse_io(argv[optind + 1], cfg_out);
+	if (ilen != olen)
+		error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
+	cfg_num_pkt = ilen;
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_do_ipv6) {
+		struct sockaddr_in6 addr6 = {0};
+
+		addr6.sin6_family = AF_INET6;
+		addr6.sin6_port = htons(cfg_port);
+		addr6.sin6_addr = in6addr_loopback;
+		do_test((void *)&addr6, sizeof(addr6));
+	}
+
+	if (cfg_do_ipv4) {
+		struct sockaddr_in addr4 = {0};
+
+		addr4.sin_family = AF_INET;
+		addr4.sin_port = htons(cfg_port);
+		addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		do_test((void *)&addr4, sizeof(addr4));
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
new file mode 100755
index 000000000000..5aa519328a5b
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for the SO_TXTIME interface
+
+# Run in network namespace
+if [[ $# -eq 0 ]]; then
+	./in_netns.sh $0 __subprocess
+	exit $?
+fi
+
+set -e
+
+tc qdisc add dev lo root fq
+./so_txtime -4 -6 -c mono a,-1 a,-1
+./so_txtime -4 -6 -c mono a,0 a,0
+./so_txtime -4 -6 -c mono a,10 a,10
+./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
+./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
+
+if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then
+	! ./so_txtime -4 -6 -c tai a,-1 a,-1
+	! ./so_txtime -4 -6 -c tai a,0 a,0
+	./so_txtime -4 -6 -c tai a,10 a,10
+	./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
+	./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+else
+	echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+fi
+
+echo OK. All tests passed
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
new file mode 100644
index 000000000000..9c55ec44fc43
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test key rotation for TFO.
+ * New keys are 'rotated' in two steps:
+ * 1) Add new key as the 'backup' key 'behind' the primary key
+ * 2) Make new key the primary by swapping the backup and primary keys
+ *
+ * The rotation is done in stages using multiple sockets bound
+ * to the same port via SO_REUSEPORT. This simulates key rotation
+ * behind say a load balancer. We verify that across the rotation
+ * there are no cases in which a cookie is not accepted by verifying
+ * that TcpExtTCPFastOpenPassiveFail remains 0.
+ */
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <netinet/tcp.h>
+#include <fcntl.h>
+#include <time.h>
+
+#ifndef TCP_FASTOPEN_KEY
+#define TCP_FASTOPEN_KEY 33
+#endif
+
+#define N_LISTEN 10
+#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
+#define KEY_LENGTH 16
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+static bool do_ipv6;
+static bool do_sockopt;
+static bool do_rotate;
+static int key_len = KEY_LENGTH;
+static int rcv_fds[N_LISTEN];
+static int proc_fd;
+static const char *IP4_ADDR = "127.0.0.1";
+static const char *IP6_ADDR = "::1";
+static const int PORT = 8891;
+
+static void get_keys(int fd, uint32_t *keys)
+{
+	char buf[128];
+	socklen_t len = KEY_LENGTH * 2;
+
+	if (do_sockopt) {
+		if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len))
+			error(1, errno, "Unable to get key");
+		return;
+	}
+	lseek(proc_fd, 0, SEEK_SET);
+	if (read(proc_fd, buf, sizeof(buf)) <= 0)
+		error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY);
+	if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2,
+	    keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8)
+		error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY);
+}
+
+static void set_keys(int fd, uint32_t *keys)
+{
+	char buf[128];
+
+	if (do_sockopt) {
+		if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys,
+		    key_len))
+			error(1, errno, "Unable to set key");
+		return;
+	}
+	if (do_rotate)
+		snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x",
+			 keys[0], keys[1], keys[2], keys[3], keys[4], keys[5],
+			 keys[6], keys[7]);
+	else
+		snprintf(buf, 128, "%08x-%08x-%08x-%08x",
+			 keys[0], keys[1], keys[2], keys[3]);
+	lseek(proc_fd, 0, SEEK_SET);
+	if (write(proc_fd, buf, sizeof(buf)) <= 0)
+		error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY);
+}
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds)
+{
+	struct sockaddr_in  addr4 = {0};
+	struct sockaddr_in6 addr6 = {0};
+	struct sockaddr *addr;
+	int opt = 1, i, sz;
+	int qlen = 100;
+	uint32_t keys[8];
+
+	switch (family) {
+	case AF_INET:
+		addr4.sin_family = family;
+		addr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		addr4.sin_port = htons(PORT);
+		sz = sizeof(addr4);
+		addr = (struct sockaddr *)&addr4;
+		break;
+	case AF_INET6:
+		addr6.sin6_family = AF_INET6;
+		addr6.sin6_addr = in6addr_any;
+		addr6.sin6_port = htons(PORT);
+		sz = sizeof(addr6);
+		addr = (struct sockaddr *)&addr6;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return;
+	}
+	for (i = 0; i < ARRAY_SIZE(keys); i++)
+		keys[i] = rand();
+	for (i = 0; i < N_LISTEN; i++) {
+		rcv_fds[i] = socket(family, proto, 0);
+		if (rcv_fds[i] < 0)
+			error(1, errno, "failed to create receive socket");
+		if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT");
+		if (bind(rcv_fds[i], addr, sz))
+			error(1, errno, "failed to bind receive socket");
+		if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen,
+			       sizeof(qlen)))
+			error(1, errno, "failed to set TCP_FASTOPEN");
+		set_keys(rcv_fds[i], keys);
+		if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+			error(1, errno, "failed to listen on receive port");
+	}
+}
+
+static int connect_and_send(int family, int proto)
+{
+	struct sockaddr_in  saddr4 = {0};
+	struct sockaddr_in  daddr4 = {0};
+	struct sockaddr_in6 saddr6 = {0};
+	struct sockaddr_in6 daddr6 = {0};
+	struct sockaddr *saddr, *daddr;
+	int fd, sz, ret;
+	char data[1];
+
+	switch (family) {
+	case AF_INET:
+		saddr4.sin_family = AF_INET;
+		saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		saddr4.sin_port = 0;
+
+		daddr4.sin_family = AF_INET;
+		if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
+			error(1, errno, "inet_pton failed: %s", IP4_ADDR);
+		daddr4.sin_port = htons(PORT);
+
+		sz = sizeof(saddr4);
+		saddr = (struct sockaddr *)&saddr4;
+		daddr = (struct sockaddr *)&daddr4;
+		break;
+	case AF_INET6:
+		saddr6.sin6_family = AF_INET6;
+		saddr6.sin6_addr = in6addr_any;
+
+		daddr6.sin6_family = AF_INET6;
+		if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
+			error(1, errno, "inet_pton failed: %s", IP6_ADDR);
+		daddr6.sin6_port = htons(PORT);
+
+		sz = sizeof(saddr6);
+		saddr = (struct sockaddr *)&saddr6;
+		daddr = (struct sockaddr *)&daddr6;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, daddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return -1;
+	}
+	fd = socket(family, proto, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+	if (bind(fd, saddr, sz))
+		error(1, errno, "failed to bind send socket");
+	data[0] = 'a';
+	ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz);
+	if (ret != 1)
+		error(1, errno, "failed to sendto");
+
+	return fd;
+}
+
+static bool is_listen_fd(int fd)
+{
+	int i;
+
+	for (i = 0; i < N_LISTEN; i++) {
+		if (rcv_fds[i] == fd)
+			return true;
+	}
+	return false;
+}
+
+static void rotate_key(int fd)
+{
+	static int iter;
+	static uint32_t new_key[4];
+	uint32_t keys[8];
+	uint32_t tmp_key[4];
+	int i;
+
+	if (iter < N_LISTEN) {
+		/* first set new key as backups */
+		if (iter == 0) {
+			for (i = 0; i < ARRAY_SIZE(new_key); i++)
+				new_key[i] = rand();
+		}
+		get_keys(fd, keys);
+		memcpy(keys + 4, new_key, KEY_LENGTH);
+		set_keys(fd, keys);
+	} else {
+		/* swap the keys */
+		get_keys(fd, keys);
+		memcpy(tmp_key, keys + 4, KEY_LENGTH);
+		memcpy(keys + 4, keys, KEY_LENGTH);
+		memcpy(keys, tmp_key, KEY_LENGTH);
+		set_keys(fd, keys);
+	}
+	if (++iter >= (N_LISTEN * 2))
+		iter = 0;
+}
+
+static void run_one_test(int family)
+{
+	struct epoll_event ev;
+	int i, send_fd;
+	int n_loops = 10000;
+	int rotate_key_fd = 0;
+	int key_rotate_interval = 50;
+	int fd, epfd;
+	char buf[1];
+
+	build_rcv_fd(family, SOCK_STREAM, rcv_fds);
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+	ev.events = EPOLLIN;
+	for (i = 0; i < N_LISTEN; i++) {
+		ev.data.fd = rcv_fds[i];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+			error(1, errno, "failed to register sock epoll");
+	}
+	while (n_loops--) {
+		send_fd = connect_and_send(family, SOCK_STREAM);
+		if (do_rotate && ((n_loops % key_rotate_interval) == 0)) {
+			rotate_key(rcv_fds[rotate_key_fd]);
+			if (++rotate_key_fd >= N_LISTEN)
+				rotate_key_fd = 0;
+		}
+		while (1) {
+			i = epoll_wait(epfd, &ev, 1, -1);
+			if (i < 0)
+				error(1, errno, "epoll_wait failed");
+			if (is_listen_fd(ev.data.fd)) {
+				fd = accept(ev.data.fd, NULL, NULL);
+				if (fd < 0)
+					error(1, errno, "failed to accept");
+				ev.data.fd = fd;
+				if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+					error(1, errno, "failed epoll add");
+				continue;
+			}
+			i = recv(ev.data.fd, buf, sizeof(buf), 0);
+			if (i != 1)
+				error(1, errno, "failed recv data");
+			if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL))
+				error(1, errno, "failed epoll del");
+			close(ev.data.fd);
+			break;
+		}
+		close(send_fd);
+	}
+	for (i = 0; i < N_LISTEN; i++)
+		close(rcv_fds[i]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46sr")) != -1) {
+		switch (c) {
+		case '4':
+			do_ipv6 = false;
+			break;
+		case '6':
+			do_ipv6 = true;
+			break;
+		case 's':
+			do_sockopt = true;
+			break;
+		case 'r':
+			do_rotate = true;
+			key_len = KEY_LENGTH * 2;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR);
+	if (proc_fd < 0)
+		error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY);
+	srand(time(NULL));
+	if (do_ipv6)
+		run_one_test(AF_INET6);
+	else
+		run_one_test(AF_INET);
+	close(proc_fd);
+	fprintf(stderr, "PASS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
new file mode 100755
index 000000000000..41476399e184
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# rotate TFO keys for ipv4/ipv6 and verify that the client does
+# not present an invalid cookie.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+	ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \
+		>/dev/null 2>&1
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+	# flush routes before each run, otherwise successive runs can
+	# initially present an old TFO cookie
+	ip netns exec "${NETNS}" ip tcp_metrics flush
+	ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1"
+	val=$(ip netns exec "${NETNS}" nstat -az | \
+		grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}')
+	if [ $val -ne 0 ]; then
+		echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero"
+		return 1
+	fi
+}
+
+do_test "-4"
+do_test "-6"
+do_test "-4"
+do_test "-6"
+do_test "-4s"
+do_test "-6s"
+do_test "-4s"
+do_test "-6s"
+do_test "-4r"
+do_test "-6r"
+do_test "-4r"
+do_test "-6r"
+do_test "-4sr"
+do_test "-6sr"
+do_test "-4sr"
+do_test "-6sr"
+echo "all tests done"
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
new file mode 100755
index 000000000000..3119b80e711f
--- /dev/null
+++ b/tools/testing/selftests/net/test_blackhole_dev.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs blackhole-dev test using blackhole-dev kernel module
+
+if /sbin/modprobe -q test_blackhole_dev ; then
+	/sbin/modprobe -q -r test_blackhole_dev;
+	echo "test_blackhole_dev: ok";
+else
+	echo "test_blackhole_dev: [FAIL]";
+	exit 1;
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 5670a9ffd8eb..80b5d352702e 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -3,6 +3,48 @@
 #
 # Run a series of udpgso benchmarks
 
+readonly GREEN='\033[0;92m'
+readonly YELLOW='\033[0;33m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m' # No Color
+
+readonly KSFT_PASS=0
+readonly KSFT_FAIL=1
+readonly KSFT_SKIP=4
+
+num_pass=0
+num_err=0
+num_skip=0
+
+kselftest_test_exitcode() {
+	local -r exitcode=$1
+
+	if [[ ${exitcode} -eq ${KSFT_PASS} ]]; then
+		num_pass=$(( $num_pass + 1 ))
+	elif [[ ${exitcode} -eq ${KSFT_SKIP} ]]; then
+		num_skip=$(( $num_skip + 1 ))
+	else
+		num_err=$(( $num_err + 1 ))
+	fi
+}
+
+kselftest_exit() {
+	echo -e "$(basename $0): PASS=${num_pass} SKIP=${num_skip} FAIL=${num_err}"
+
+	if [[ $num_err -ne 0 ]]; then
+		echo -e "$(basename $0): ${RED}FAIL${NC}"
+		exit ${KSFT_FAIL}
+	fi
+
+	if [[ $num_skip -ne 0 ]]; then
+		echo -e "$(basename $0): ${YELLOW}SKIP${NC}"
+		exit ${KSFT_SKIP}
+	fi
+
+	echo -e "$(basename $0): ${GREEN}PASS${NC}"
+	exit ${KSFT_PASS}
+}
+
 wake_children() {
 	local -r jobs="$(jobs -p)"
 
@@ -25,6 +67,7 @@ run_in_netns() {
 	local -r args=$@
 
 	./in_netns.sh $0 __subprocess ${args}
+	kselftest_test_exitcode $?
 }
 
 run_udp() {
@@ -38,6 +81,18 @@ run_udp() {
 
 	echo "udp gso zerocopy"
 	run_in_netns ${args} -S 0 -z
+
+	echo "udp gso timestamp"
+	run_in_netns ${args} -S 0 -T
+
+	echo "udp gso zerocopy audit"
+	run_in_netns ${args} -S 0 -z -a
+
+	echo "udp gso timestamp audit"
+	run_in_netns ${args} -S 0 -T -a
+
+	echo "udp gso zerocopy timestamp audit"
+	run_in_netns ${args} -S 0 -T -z -a
 }
 
 run_tcp() {
@@ -48,10 +103,15 @@ run_tcp() {
 
 	echo "tcp zerocopy"
 	run_in_netns ${args} -t -z
+
+	# excluding for now because test fails intermittently
+	# add -P option to include poll() to reduce possibility of lost messages
+	#echo "tcp zerocopy audit"
+	#run_in_netns ${args} -t -z -P -a
 }
 
 run_all() {
-	local -r core_args="-l 4"
+	local -r core_args="-l 3"
 	local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
 	local -r ipv6_args="${core_args} -6 -D ::1"
 
@@ -66,6 +126,7 @@ run_all() {
 
 if [[ $# -eq 0 ]]; then
 	run_all
+	kselftest_exit
 elif [[ $1 == "__subprocess" ]]; then
 	shift
 	run_one $@
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 4074538b5df5..ada99496634a 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -5,6 +5,8 @@
 #include <arpa/inet.h>
 #include <errno.h>
 #include <error.h>
+#include <linux/errqueue.h>
+#include <linux/net_tstamp.h>
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
@@ -19,9 +21,12 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <sys/time.h>
+#include <sys/poll.h>
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "../kselftest.h"
+
 #ifndef ETH_MAX_MTU
 #define ETH_MAX_MTU 0xFFFFU
 #endif
@@ -34,10 +39,18 @@
 #define SO_ZEROCOPY	60
 #endif
 
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY 5
+#endif
+
 #ifndef MSG_ZEROCOPY
 #define MSG_ZEROCOPY	0x4000000
 #endif
 
+#ifndef ENOTSUPP
+#define ENOTSUPP	524
+#endif
+
 #define NUM_PKT		100
 
 static bool	cfg_cache_trash;
@@ -48,12 +61,24 @@ static uint16_t	cfg_mss;
 static int	cfg_payload_len	= (1472 * 42);
 static int	cfg_port	= 8000;
 static int	cfg_runtime_ms	= -1;
+static bool	cfg_poll;
 static bool	cfg_segment;
 static bool	cfg_sendmmsg;
 static bool	cfg_tcp;
+static uint32_t	cfg_tx_ts = SOF_TIMESTAMPING_TX_SOFTWARE;
+static bool	cfg_tx_tstamp;
+static bool	cfg_audit;
+static bool	cfg_verbose;
 static bool	cfg_zerocopy;
 static int	cfg_msg_nr;
 static uint16_t	cfg_gso_size;
+static unsigned long total_num_msgs;
+static unsigned long total_num_sends;
+static unsigned long stat_tx_ts;
+static unsigned long stat_tx_ts_errors;
+static unsigned long tstart;
+static unsigned long tend;
+static unsigned long stat_zcopies;
 
 static socklen_t cfg_alen;
 static struct sockaddr_storage cfg_dst_addr;
@@ -110,23 +135,125 @@ static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
 	}
 }
 
-static void flush_zerocopy(int fd)
+static void flush_cmsg(struct cmsghdr *cmsg)
+{
+	struct sock_extended_err *err;
+	struct scm_timestamping *tss;
+	__u32 lo;
+	__u32 hi;
+	int i;
+
+	switch (cmsg->cmsg_level) {
+	case SOL_SOCKET:
+		if (cmsg->cmsg_type == SO_TIMESTAMPING) {
+			i = (cfg_tx_ts == SOF_TIMESTAMPING_TX_HARDWARE) ? 2 : 0;
+			tss = (struct scm_timestamping *)CMSG_DATA(cmsg);
+			if (tss->ts[i].tv_sec == 0)
+				stat_tx_ts_errors++;
+		} else {
+			error(1, 0, "unknown SOL_SOCKET cmsg type=%u\n",
+			      cmsg->cmsg_type);
+		}
+		break;
+	case SOL_IP:
+	case SOL_IPV6:
+		switch (cmsg->cmsg_type) {
+		case IP_RECVERR:
+		case IPV6_RECVERR:
+		{
+			err = (struct sock_extended_err *)CMSG_DATA(cmsg);
+			switch (err->ee_origin) {
+			case SO_EE_ORIGIN_TIMESTAMPING:
+				/* Got a TX timestamp from error queue */
+				stat_tx_ts++;
+				break;
+			case SO_EE_ORIGIN_ICMP:
+			case SO_EE_ORIGIN_ICMP6:
+				if (cfg_verbose)
+					fprintf(stderr,
+						"received ICMP error: type=%u, code=%u\n",
+						err->ee_type, err->ee_code);
+				break;
+			case SO_EE_ORIGIN_ZEROCOPY:
+			{
+				lo = err->ee_info;
+				hi = err->ee_data;
+				/* range of IDs acknowledged */
+				stat_zcopies += hi - lo + 1;
+				break;
+			}
+			case SO_EE_ORIGIN_LOCAL:
+				if (cfg_verbose)
+					fprintf(stderr,
+						"received packet with local origin: %u\n",
+						err->ee_origin);
+				break;
+			default:
+				error(0, 1, "received packet with origin: %u",
+				      err->ee_origin);
+			}
+			break;
+		}
+		default:
+			error(0, 1, "unknown IP msg type=%u\n",
+			      cmsg->cmsg_type);
+			break;
+		}
+		break;
+	default:
+		error(0, 1, "unknown cmsg level=%u\n",
+		      cmsg->cmsg_level);
+	}
+}
+
+static void flush_errqueue_recv(int fd)
 {
-	struct msghdr msg = {0};	/* flush */
+	char control[CMSG_SPACE(sizeof(struct scm_timestamping)) +
+		     CMSG_SPACE(sizeof(struct sock_extended_err)) +
+		     CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
+	struct msghdr msg = {0};
+	struct cmsghdr *cmsg;
 	int ret;
 
 	while (1) {
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
 		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
 		if (ret == -1 && errno == EAGAIN)
 			break;
 		if (ret == -1)
 			error(1, errno, "errqueue");
-		if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+		if (msg.msg_flags != MSG_ERRQUEUE)
 			error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+		if (cfg_audit) {
+			for (cmsg = CMSG_FIRSTHDR(&msg);
+					cmsg;
+					cmsg = CMSG_NXTHDR(&msg, cmsg))
+				flush_cmsg(cmsg);
+		}
 		msg.msg_flags = 0;
 	}
 }
 
+static void flush_errqueue(int fd, const bool do_poll)
+{
+	if (do_poll) {
+		struct pollfd fds = {0};
+		int ret;
+
+		fds.fd = fd;
+		ret = poll(&fds, 1, 500);
+		if (ret == 0) {
+			if (cfg_verbose)
+				fprintf(stderr, "poll timeout\n");
+		} else if (ret < 0) {
+			error(1, errno, "poll");
+		}
+	}
+
+	flush_errqueue_recv(fd);
+}
+
 static int send_tcp(int fd, char *data)
 {
 	int ret, done = 0, count = 0;
@@ -168,16 +295,40 @@ static int send_udp(int fd, char *data)
 	return count;
 }
 
+static void send_ts_cmsg(struct cmsghdr *cm)
+{
+	uint32_t *valp;
+
+	cm->cmsg_level = SOL_SOCKET;
+	cm->cmsg_type = SO_TIMESTAMPING;
+	cm->cmsg_len = CMSG_LEN(sizeof(cfg_tx_ts));
+	valp = (void *)CMSG_DATA(cm);
+	*valp = cfg_tx_ts;
+}
+
 static int send_udp_sendmmsg(int fd, char *data)
 {
+	char control[CMSG_SPACE(sizeof(cfg_tx_ts))] = {0};
 	const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
 	struct mmsghdr mmsgs[max_nr_msg];
 	struct iovec iov[max_nr_msg];
 	unsigned int off = 0, left;
+	size_t msg_controllen = 0;
 	int i = 0, ret;
 
 	memset(mmsgs, 0, sizeof(mmsgs));
 
+	if (cfg_tx_tstamp) {
+		struct msghdr msg = {0};
+		struct cmsghdr *cmsg;
+
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msg);
+		send_ts_cmsg(cmsg);
+		msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts));
+	}
+
 	left = cfg_payload_len;
 	while (left) {
 		if (i == max_nr_msg)
@@ -189,6 +340,13 @@ static int send_udp_sendmmsg(int fd, char *data)
 		mmsgs[i].msg_hdr.msg_iov = iov + i;
 		mmsgs[i].msg_hdr.msg_iovlen = 1;
 
+		mmsgs[i].msg_hdr.msg_name = (void *)&cfg_dst_addr;
+		mmsgs[i].msg_hdr.msg_namelen = cfg_alen;
+		if (msg_controllen) {
+			mmsgs[i].msg_hdr.msg_control = control;
+			mmsgs[i].msg_hdr.msg_controllen = msg_controllen;
+		}
+
 		off += iov[i].iov_len;
 		left -= iov[i].iov_len;
 		i++;
@@ -214,9 +372,12 @@ static void send_udp_segment_cmsg(struct cmsghdr *cm)
 
 static int send_udp_segment(int fd, char *data)
 {
-	char control[CMSG_SPACE(sizeof(cfg_gso_size))] = {0};
+	char control[CMSG_SPACE(sizeof(cfg_gso_size)) +
+		     CMSG_SPACE(sizeof(cfg_tx_ts))] = {0};
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
+	size_t msg_controllen;
+	struct cmsghdr *cmsg;
 	int ret;
 
 	iov.iov_base = data;
@@ -227,8 +388,16 @@ static int send_udp_segment(int fd, char *data)
 
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
-	send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+	cmsg = CMSG_FIRSTHDR(&msg);
+	send_udp_segment_cmsg(cmsg);
+	msg_controllen = CMSG_SPACE(sizeof(cfg_mss));
+	if (cfg_tx_tstamp) {
+		cmsg = CMSG_NXTHDR(&msg, cmsg);
+		send_ts_cmsg(cmsg);
+		msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts));
+	}
 
+	msg.msg_controllen = msg_controllen;
 	msg.msg_name = (void *)&cfg_dst_addr;
 	msg.msg_namelen = cfg_alen;
 
@@ -243,7 +412,7 @@ static int send_udp_segment(int fd, char *data)
 
 static void usage(const char *filepath)
 {
-	error(1, 0, "Usage: %s [-46cmtuz] [-C cpu] [-D dst ip] [-l secs] [-m messagenr] [-p port] [-s sendsize] [-S gsosize]",
+	error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
 		    filepath);
 }
 
@@ -252,7 +421,7 @@ static void parse_opts(int argc, char **argv)
 	int max_len, hdrlen;
 	int c;
 
-	while ((c = getopt(argc, argv, "46cC:D:l:mM:p:s:S:tuz")) != -1) {
+	while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) {
 		switch (c) {
 		case '4':
 			if (cfg_family != PF_UNSPEC)
@@ -266,6 +435,9 @@ static void parse_opts(int argc, char **argv)
 			cfg_family = PF_INET6;
 			cfg_alen = sizeof(struct sockaddr_in6);
 			break;
+		case 'a':
+			cfg_audit = true;
+			break;
 		case 'c':
 			cfg_cache_trash = true;
 			break;
@@ -287,6 +459,9 @@ static void parse_opts(int argc, char **argv)
 		case 'p':
 			cfg_port = strtoul(optarg, NULL, 0);
 			break;
+		case 'P':
+			cfg_poll = true;
+			break;
 		case 's':
 			cfg_payload_len = strtoul(optarg, NULL, 0);
 			break;
@@ -294,12 +469,22 @@ static void parse_opts(int argc, char **argv)
 			cfg_gso_size = strtoul(optarg, NULL, 0);
 			cfg_segment = true;
 			break;
+		case 'H':
+			cfg_tx_ts = SOF_TIMESTAMPING_TX_HARDWARE;
+			cfg_tx_tstamp = true;
+			break;
 		case 't':
 			cfg_tcp = true;
 			break;
+		case 'T':
+			cfg_tx_tstamp = true;
+			break;
 		case 'u':
 			cfg_connected = false;
 			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
 		case 'z':
 			cfg_zerocopy = true;
 			break;
@@ -315,6 +500,8 @@ static void parse_opts(int argc, char **argv)
 		error(1, 0, "connectionless tcp makes no sense");
 	if (cfg_segment && cfg_sendmmsg)
 		error(1, 0, "cannot combine segment offload and sendmmsg");
+	if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg))
+		error(1, 0, "Options -T and -H require either -S or -m option");
 
 	if (cfg_family == PF_INET)
 		hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
@@ -349,11 +536,80 @@ static void set_pmtu_discover(int fd, bool is_ipv4)
 		error(1, errno, "setsockopt path mtu");
 }
 
+static void set_tx_timestamping(int fd)
+{
+	int val = SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID |
+			SOF_TIMESTAMPING_OPT_TSONLY;
+
+	if (cfg_tx_ts == SOF_TIMESTAMPING_TX_SOFTWARE)
+		val |= SOF_TIMESTAMPING_SOFTWARE;
+	else
+		val |= SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)))
+		error(1, errno, "setsockopt tx timestamping");
+}
+
+static void print_audit_report(unsigned long num_msgs, unsigned long num_sends)
+{
+	unsigned long tdelta;
+
+	tdelta = tend - tstart;
+	if (!tdelta)
+		return;
+
+	fprintf(stderr, "Summary over %lu.%03lu seconds...\n",
+			tdelta / 1000, tdelta % 1000);
+	fprintf(stderr,
+		"sum %s tx: %6lu MB/s %10lu calls (%lu/s) %10lu msgs (%lu/s)\n",
+		cfg_tcp ? "tcp" : "udp",
+		((num_msgs * cfg_payload_len) >> 10) / tdelta,
+		num_sends, num_sends * 1000 / tdelta,
+		num_msgs, num_msgs * 1000 / tdelta);
+
+	if (cfg_tx_tstamp) {
+		if (stat_tx_ts_errors)
+			error(1, 0,
+			      "Expected clean TX Timestamps: %9lu msgs received %6lu errors",
+			      stat_tx_ts, stat_tx_ts_errors);
+		if (stat_tx_ts != num_sends)
+			error(1, 0,
+			      "Unexpected number of TX Timestamps: %9lu expected %9lu received",
+			      num_sends, stat_tx_ts);
+		fprintf(stderr,
+			"Tx Timestamps: %19lu received %17lu errors\n",
+			stat_tx_ts, stat_tx_ts_errors);
+	}
+
+	if (cfg_zerocopy) {
+		if (stat_zcopies != num_sends)
+			error(1, 0, "Unexpected number of Zerocopy completions: %9lu expected %9lu received",
+			      num_sends, stat_zcopies);
+		fprintf(stderr,
+			"Zerocopy acks: %19lu\n",
+			stat_zcopies);
+	}
+}
+
+static void print_report(unsigned long num_msgs, unsigned long num_sends)
+{
+	fprintf(stderr,
+		"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+		cfg_tcp ? "tcp" : "udp",
+		(num_msgs * cfg_payload_len) >> 20,
+		num_sends, num_msgs);
+
+	if (cfg_audit) {
+		total_num_msgs += num_msgs;
+		total_num_sends += num_sends;
+	}
+}
+
 int main(int argc, char **argv)
 {
 	unsigned long num_msgs, num_sends;
 	unsigned long tnow, treport, tstop;
-	int fd, i, val;
+	int fd, i, val, ret;
 
 	parse_opts(argc, argv);
 
@@ -373,8 +629,16 @@ int main(int argc, char **argv)
 
 	if (cfg_zerocopy) {
 		val = 1;
-		if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+
+		ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+				 &val, sizeof(val));
+		if (ret) {
+			if (errno == ENOPROTOOPT || errno == ENOTSUPP) {
+				fprintf(stderr, "SO_ZEROCOPY not supported");
+				exit(KSFT_SKIP);
+			}
 			error(1, errno, "setsockopt zerocopy");
+		}
 	}
 
 	if (cfg_connected &&
@@ -384,8 +648,13 @@ int main(int argc, char **argv)
 	if (cfg_segment)
 		set_pmtu_discover(fd, cfg_family == PF_INET);
 
+	if (cfg_tx_tstamp)
+		set_tx_timestamping(fd);
+
 	num_msgs = num_sends = 0;
 	tnow = gettimeofday_ms();
+	tstart = tnow;
+	tend = tnow;
 	tstop = tnow + cfg_runtime_ms;
 	treport = tnow + 1000;
 
@@ -400,19 +669,15 @@ int main(int argc, char **argv)
 		else
 			num_sends += send_udp(fd, buf[i]);
 		num_msgs++;
-		if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
-			flush_zerocopy(fd);
+		if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp)
+			flush_errqueue(fd, cfg_poll);
 
 		if (cfg_msg_nr && num_msgs >= cfg_msg_nr)
 			break;
 
 		tnow = gettimeofday_ms();
-		if (tnow > treport) {
-			fprintf(stderr,
-				"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
-				cfg_tcp ? "tcp" : "udp",
-				(num_msgs * cfg_payload_len) >> 20,
-				num_sends, num_msgs);
+		if (tnow >= treport) {
+			print_report(num_msgs, num_sends);
 			num_msgs = num_sends = 0;
 			treport = tnow + 1000;
 		}
@@ -423,8 +688,18 @@ int main(int argc, char **argv)
 
 	} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
 
+	if (cfg_zerocopy || cfg_tx_tstamp)
+		flush_errqueue(fd, true);
+
 	if (close(fd))
 		error(1, errno, "close");
 
+	if (cfg_audit) {
+		tend = tnow;
+		total_num_msgs += num_msgs;
+		total_num_sends += num_sends;
+		print_audit_report(total_num_msgs, total_num_sends);
+	}
+
 	return 0;
 }
diff --git a/tools/testing/selftests/ptp/phc.sh b/tools/testing/selftests/ptp/phc.sh
new file mode 100755
index 000000000000..ac6e5a6e1d3a
--- /dev/null
+++ b/tools/testing/selftests/ptp/phc.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	settime
+	adjtime
+	adjfreq
+"
+DEV=$1
+
+##############################################################################
+# Sanity checks
+
+if [[ "$(id -u)" -ne 0 ]]; then
+	echo "SKIP: need root privileges"
+	exit 0
+fi
+
+if [[ "$DEV" == "" ]]; then
+	echo "SKIP: PTP device not provided"
+	exit 0
+fi
+
+require_command()
+{
+	local cmd=$1; shift
+
+	if [[ ! -x "$(command -v "$cmd")" ]]; then
+		echo "SKIP: $cmd not installed"
+		exit 1
+	fi
+}
+
+phc_sanity()
+{
+	phc_ctl $DEV get &> /dev/null
+
+	if [ $? != 0 ]; then
+		echo "SKIP: unknown clock $DEV: No such device"
+		exit 1
+	fi
+}
+
+require_command phc_ctl
+phc_sanity
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+	local err=$1
+
+	if [[ $RET -eq 0 && $err -ne 0 ]]; then
+		RET=$err
+	fi
+}
+
+log_test()
+{
+	local test_name=$1
+
+	if [[ $RET -ne 0 ]]; then
+		EXIT_STATUS=1
+		printf "TEST: %-60s  [FAIL]\n" "$test_name"
+		return 1
+	fi
+
+	printf "TEST: %-60s  [ OK ]\n" "$test_name"
+	return 0
+}
+
+tests_run()
+{
+	local current_test
+
+	for current_test in ${TESTS:-$ALL_TESTS}; do
+		$current_test
+	done
+}
+
+##############################################################################
+# Tests
+
+settime_do()
+{
+	local res
+
+	res=$(phc_ctl $DEV set 0 wait 120.5 get 2> /dev/null \
+		| awk '/clock time is/{print $5}' \
+		| awk -F. '{print $1}')
+
+	(( res == 120 ))
+}
+
+adjtime_do()
+{
+	local res
+
+	res=$(phc_ctl $DEV set 0 adj 10 get 2> /dev/null \
+		| awk '/clock time is/{print $5}' \
+		| awk -F. '{print $1}')
+
+	(( res == 10 ))
+}
+
+adjfreq_do()
+{
+	local res
+
+	# Set the clock to be 1% faster
+	res=$(phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2> /dev/null \
+		| awk '/clock time is/{print $5}' \
+		| awk -F. '{print $1}')
+
+	(( res == 101 ))
+}
+
+##############################################################################
+
+cleanup()
+{
+	phc_ctl $DEV freq 0.0 &> /dev/null
+	phc_ctl $DEV set &> /dev/null
+}
+
+settime()
+{
+	RET=0
+
+	settime_do
+	check_err $?
+	log_test "settime"
+	cleanup
+}
+
+adjtime()
+{
+	RET=0
+
+	adjtime_do
+	check_err $?
+	log_test "adjtime"
+	cleanup
+}
+
+adjfreq()
+{
+	RET=0
+
+	adjfreq_do
+	check_err $?
+	log_test "adjfreq"
+	cleanup
+}
+
+trap cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index f9281e8aa313..22e5da9008fd 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -12,10 +12,10 @@ REQUIREMENTS
 *  Minimum Python version of 3.4. Earlier 3.X versions may work but are not
    guaranteed.
 
-*  The kernel must have network namespace support
+*  The kernel must have network namespace support if using nsPlugin
 
 *  The kernel must have veth support available, as a veth pair is created
-   prior to running the tests.
+   prior to running the tests when using nsPlugin.
 
 *  The kernel must have the appropriate infrastructure enabled to run all tdc
    unit tests. See the config file in this directory for minimum required
@@ -53,8 +53,12 @@ commands being tested must be run as root.  The code that enforces
 execution by root uid has been moved into a plugin (see PLUGIN
 ARCHITECTURE, below).
 
-If nsPlugin is linked, all tests are executed inside a network
-namespace to prevent conflicts within the host.
+Tests that use a network device should have nsPlugin.py listed as a
+requirement for that test. nsPlugin executes all commands within a
+network namespace and creates a veth pair which may be used in those test
+cases. To disable execution within the namespace, pass the -N option
+to tdc when starting a test run; the veth pair will still be created
+by the plugin.
 
 Running tdc without any arguments will run all tests. Refer to the section
 on command line arguments for more information, or run:
@@ -154,8 +158,8 @@ action:
 netns:
   options for nsPlugin (run commands in net namespace)
 
-  -n, --namespace
-                        Run commands in namespace as specified in tdc_config.py
+  -N, --no-namespace
+                        Do not run commands in a network namespace.
 
 valgrind:
   options for valgrindPlugin (run command under test under Valgrind)
@@ -171,7 +175,8 @@ was in the tdc.py script has been moved into the plugins.
 
 The plugins are in the directory plugin-lib.  The are executed from
 directory plugins.  Put symbolic links from plugins to plugin-lib,
-and name them according to the order you want them to run.
+and name them according to the order you want them to run. This is not
+necessary if a test case being run requires a specific plugin to work.
 
 Example:
 
@@ -223,7 +228,8 @@ directory:
   - rootPlugin.py:
       implements the enforcement of running as root
   - nsPlugin.py:
-      sets up a network namespace and runs all commands in that namespace
+      sets up a network namespace and runs all commands in that namespace,
+      while also setting up dummy devices to be used in testing.
   - valgrindPlugin.py
       runs each command in the execute stage under valgrind,
       and checks for leaks.
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 203302065458..1adc4f9bb795 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -38,11 +38,11 @@ CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_ACT_BPF=m
 CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_CTINFO=m
 CONFIG_NET_ACT_SKBMOD=m
 CONFIG_NET_ACT_IFE=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_IFE_SKBMARK=m
 CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_CLS_IND=y
 CONFIG_NET_SCH_FIFO=y
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
index 9f0ba10c44b4..e98c36750fae 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -34,8 +34,9 @@ class SubPlugin(TdcPlugin):
             'buildebpf',
             'options for buildebpfPlugin')
         self.argparser_group.add_argument(
-            '-B', '--buildebpf', action='store_true',
-            help='build eBPF programs')
+            '--nobuildebpf', action='store_false', default=True,
+            dest='buildebpf',
+            help='Don\'t build eBPF programs')
 
         return self.argparser
 
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index a194b1af2b30..affa7f2d9670 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -18,6 +18,8 @@ class SubPlugin(TdcPlugin):
 
         if self.args.namespace:
             self._ns_create()
+        else:
+            self._ports_create()
 
     def post_suite(self, index):
         '''run commands after test_runner goes into a test loop'''
@@ -27,6 +29,8 @@ class SubPlugin(TdcPlugin):
 
         if self.args.namespace:
             self._ns_destroy()
+        else:
+            self._ports_destroy()
 
     def add_args(self, parser):
         super().add_args(parser)
@@ -34,8 +38,8 @@ class SubPlugin(TdcPlugin):
             'netns',
             'options for nsPlugin(run commands in net namespace)')
         self.argparser_group.add_argument(
-            '-n', '--namespace', action='store_true',
-            help='Run commands in namespace')
+            '-N', '--no-namespace', action='store_false', default=True,
+            dest='namespace', help='Don\'t run commands in namespace')
         return self.argparser
 
     def adjust_command(self, stage, command):
@@ -73,20 +77,30 @@ class SubPlugin(TdcPlugin):
             print('adjust_command:  return command [{}]'.format(command))
         return command
 
+    def _ports_create(self):
+        cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+        self._exec_cmd('pre', cmd)
+        cmd = 'ip link set $DEV0 up'
+        self._exec_cmd('pre', cmd)
+        if not self.args.namespace:
+            cmd = 'ip link set $DEV1 up'
+            self._exec_cmd('pre', cmd)
+
+    def _ports_destroy(self):
+        cmd = 'ip link del $DEV0'
+        self._exec_cmd('post', cmd)
+
     def _ns_create(self):
         '''
         Create the network namespace in which the tests will be run and set up
         the required network devices for it.
         '''
+        self._ports_create()
         if self.args.namespace:
             cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
             self._exec_cmd('pre', cmd)
-            cmd = 'ip link add $DEV0 type veth peer name $DEV1'
-            self._exec_cmd('pre', cmd)
             cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
             self._exec_cmd('pre', cmd)
-            cmd = 'ip link set $DEV0 up'
-            self._exec_cmd('pre', cmd)
             cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
             self._exec_cmd('pre', cmd)
             if self.args.device:
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index b074ea9b6fe8..47a3082b6661 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -54,6 +54,9 @@
             "actions",
             "bpf"
         ],
+        "plugins": {
+                "requires": "buildebpfPlugin"
+        },
         "setup": [
             [
                 "$TC action flush action bpf",
@@ -78,6 +81,9 @@
             "actions",
             "bpf"
         ],
+        "plugins": {
+                "requires": "buildebpfPlugin"
+        },
         "setup": [
             [
                 "$TC action flush action bpf",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 6e5fb3d25681..2232b21e2510 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -459,5 +459,99 @@
         "teardown": [
             "$TC actions flush action mirred"
         ]
+    },
+    {
+        "id": "4749",
+        "name": "Add batch of 32 mirred redirect egress actions with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred egress redirect dev lo index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "5c69",
+        "name": "Delete batch of 32 mirred redirect egress actions",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred egress redirect dev lo index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "d3c0",
+        "name": "Add batch of 32 mirred mirror ingress actions with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred ingress mirror dev lo index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "e684",
+        "name": "Delete batch of 32 mirred mirror ingress actions",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred ingress mirror dev lo index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
index ecd96eda7f6a..45e7e89928a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -24,8 +24,32 @@
         ]
     },
     {
+        "id": "c8cf",
+        "name": "Add skbedit action with 32-bit maximum mark",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 4294967295 pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 1",
+        "matchPattern": "action order [0-9]*: skbedit  mark 4294967295.*pipe.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
         "id": "407b",
-        "name": "Add skbedit action with invalid mark",
+        "name": "Add skbedit action with mark exceeding 32-bit maximum",
         "category": [
             "actions",
             "skbedit"
@@ -43,9 +67,7 @@
         "verifyCmd": "$TC actions list action skbedit",
         "matchPattern": "action order [0-9]*:  skbedit mark",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action skbedit"
-        ]
+        "teardown": []
     },
     {
         "id": "081d",
@@ -121,7 +143,7 @@
     },
     {
         "id": "985c",
-        "name": "Add skbedit action with invalid queue_mapping",
+        "name": "Add skbedit action with queue_mapping exceeding 16-bit maximum",
         "category": [
             "actions",
             "skbedit"
@@ -413,7 +435,7 @@
     },
     {
         "id": "a6d6",
-        "name": "Add skbedit action with index",
+        "name": "Add skbedit action with index at 32-bit maximum",
         "category": [
             "actions",
             "skbedit"
@@ -426,16 +448,38 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040",
+        "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4294967295",
         "expExitCode": "0",
-        "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "index 4040404040",
+        "verifyCmd": "$TC actions get action skbedit index 4294967295",
+        "matchPattern": "action order [0-9]*: skbedit  mark 808.*index 4294967295",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
         ]
     },
     {
+        "id": "f0f4",
+        "name": "Add skbedit action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 808 pass index 4294967297",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action skbedit index 4294967297",
+        "matchPattern": "action order [0-9]*:.*skbedit.*mark 808.*pass.*index 4294967297",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
         "id": "38f3",
         "name": "Delete skbedit action",
         "category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
index 3b97cfd7e0f8..5272049566d6 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -6,6 +6,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -25,6 +28,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -44,6 +50,114 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -57,6 +171,30 @@
         ]
     },
     {
+        "id": "c591",
+        "name": "Add fw filter with action ok by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet. Remove this when the behaviour is fixed.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact ok index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pass.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
+    {
         "id": "affe",
         "name": "Add fw filter with action continue",
         "category": [
@@ -76,6 +214,30 @@
         ]
     },
     {
+        "id": "38b3",
+        "name": "Add fw filter with action continue by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet. Remove this when the behaviour is fixed.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact continue index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action continue.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
+    {
         "id": "28bc",
         "name": "Add fw filter with action pipe",
         "category": [
@@ -95,6 +257,30 @@
         ]
     },
     {
+        "id": "6753",
+        "name": "Add fw filter with action pipe by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact pipe index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pipe.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
+    {
         "id": "8da2",
         "name": "Add fw filter with action drop",
         "category": [
@@ -114,6 +300,30 @@
         ]
     },
     {
+        "id": "6dc6",
+        "name": "Add fw filter with action drop by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact drop index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action drop.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
+    {
         "id": "9436",
         "name": "Add fw filter with action reclassify",
         "category": [
@@ -133,6 +343,30 @@
         ]
     },
     {
+        "id": "3bc2",
+        "name": "Add fw filter with action reclassify by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact reclassify index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action reclassify.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
+    {
         "id": "95bb",
         "name": "Add fw filter with action jump 10",
         "category": [
@@ -152,6 +386,30 @@
         ]
     },
     {
+        "id": "36f7",
+        "name": "Add fw filter with action jump 10 by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact jump 10 index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action jump 10.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
+    {
         "id": "3d74",
         "name": "Add fw filter with action goto chain 5",
         "category": [
@@ -728,6 +986,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: protocol 802_3 prio 3 handle 7 fw action ok"
@@ -748,6 +1009,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: prio 6 handle 2 fw action continue index 5"
@@ -768,6 +1032,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -787,6 +1054,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -806,6 +1076,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
@@ -828,6 +1101,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
@@ -850,6 +1126,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
@@ -871,6 +1150,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 4 fw action ok",
@@ -892,6 +1174,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 chain 13 fw action pipe",
@@ -913,6 +1198,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action drop"
@@ -933,6 +1221,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 3 prio 4 fw action continue"
@@ -953,6 +1244,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw action pipe"
@@ -973,6 +1267,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 fw action pipe flowid 45"
@@ -993,6 +1290,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
@@ -1013,6 +1313,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
@@ -1033,6 +1336,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 3"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 16559c436f21..0f89cd50a94b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -6,6 +6,9 @@
             "filter",
             "u32"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -25,6 +28,9 @@
             "filter",
             "matchall"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV1 clsact",
             "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
@@ -64,6 +70,9 @@
             "filter",
             "flower"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
             "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
@@ -85,6 +94,9 @@
             "filter",
             "flower"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
             "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
new file mode 100644
index 000000000000..f518c55f468b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
@@ -0,0 +1,102 @@
+[
+    {
+        "id": "9872",
+        "name": "Add ingress qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "5c5e",
+        "name": "Add ingress qdisc with unsupported argument",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress foorbar",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "74f6",
+        "name": "Add duplicate ingress qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "f769",
+        "name": "Delete nonexistent ingress qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "3b88",
+        "name": "Delete ingress qdisc twice",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC qdisc del dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
new file mode 100644
index 000000000000..9c792fa8ca23
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
@@ -0,0 +1,276 @@
+[
+    {
+        "id": "ddd9",
+        "name": "Add prio qdisc on egress",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "aa71",
+        "name": "Add prio qdisc on egress with handle of maximum value",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle ffff: prio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio ffff: root",
+        "matchCount": "1",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "db37",
+        "name": "Add prio qdisc on egress with invalid handle exceeding maximum value",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 10000: prio",
+        "expExitCode": "255",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 10000: root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "39d8",
+        "name": "Add prio qdisc on egress with unsupported argument",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio foorbar",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "5769",
+        "name": "Add prio qdisc on egress with 4 bands and new priomap",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "fe0f",
+        "name": "Add prio qdisc on egress with 4 bands and priomap exceeding TC_PRIO_MAX entries",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "1f91",
+        "name": "Add prio qdisc on egress with 4 bands and priomap's values exceeding bands number",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "d248",
+        "name": "Add prio qdisc on egress with invalid bands value (< 2)",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 1 priomap.*0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "1d0e",
+        "name": "Add prio qdisc on egress with invalid bands value exceeding TCQ_PRIO_BANDS",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 1024 priomap.*1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "1971",
+        "name": "Replace default prio qdisc on egress with 8 bands and new priomap",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 handle 1: root prio"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DEV1 handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 8 priomap.*1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "d88a",
+        "name": "Add duplicate prio qdisc on egress",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 handle 1: root prio"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "5948",
+        "name": "Delete nonexistent prio qdisc",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 root handle 1: prio",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "6c0a",
+        "name": "Add prio qdisc on egress with invalid format for handles",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 123^ prio",
+        "expExitCode": "255",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 123 root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "0175",
+        "name": "Delete prio qdisc twice",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio",
+            "$TC qdisc del dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 handle 1: root prio",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 5cee15659e5f..678182a2676d 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -25,6 +25,9 @@ from tdc_helper import *
 import TdcPlugin
 from TdcResults import *
 
+class PluginDependencyException(Exception):
+    def __init__(self, missing_pg):
+        self.missing_pg = missing_pg
 
 class PluginMgrTestFail(Exception):
     def __init__(self, stage, output, message):
@@ -37,7 +40,7 @@ class PluginMgr:
         super().__init__()
         self.plugins = {}
         self.plugin_instances = []
-        self.args = []
+        self.failed_plugins = {}
         self.argparser = argparser
 
         # TODO, put plugins in order
@@ -53,6 +56,64 @@ class PluginMgr:
                     self.plugins[mn] = foo
                     self.plugin_instances.append(foo.SubPlugin())
 
+    def load_plugin(self, pgdir, pgname):
+        pgname = pgname[0:-3]
+        foo = importlib.import_module('{}.{}'.format(pgdir, pgname))
+        self.plugins[pgname] = foo
+        self.plugin_instances.append(foo.SubPlugin())
+        self.plugin_instances[-1].check_args(self.args, None)
+
+    def get_required_plugins(self, testlist):
+        '''
+        Get all required plugins from the list of test cases and return
+        all unique items.
+        '''
+        reqs = []
+        for t in testlist:
+            try:
+                if 'requires' in t['plugins']:
+                    if isinstance(t['plugins']['requires'], list):
+                        reqs.extend(t['plugins']['requires'])
+                    else:
+                        reqs.append(t['plugins']['requires'])
+            except KeyError:
+                continue
+        reqs = get_unique_item(reqs)
+        return reqs
+
+    def load_required_plugins(self, reqs, parser, args, remaining):
+        '''
+        Get all required plugins from the list of test cases and load any plugin
+        that is not already enabled.
+        '''
+        pgd = ['plugin-lib', 'plugin-lib-custom']
+        pnf = []
+
+        for r in reqs:
+            if r not in self.plugins:
+                fname = '{}.py'.format(r)
+                source_path = []
+                for d in pgd:
+                    pgpath = '{}/{}'.format(d, fname)
+                    if os.path.isfile(pgpath):
+                        source_path.append(pgpath)
+                if len(source_path) == 0:
+                    print('ERROR: unable to find required plugin {}'.format(r))
+                    pnf.append(fname)
+                    continue
+                elif len(source_path) > 1:
+                    print('WARNING: multiple copies of plugin {} found, using version found')
+                    print('at {}'.format(source_path[0]))
+                pgdir = source_path[0]
+                pgdir = pgdir.split('/')[0]
+                self.load_plugin(pgdir, fname)
+        if len(pnf) > 0:
+            raise PluginDependencyException(pnf)
+
+        parser = self.call_add_args(parser)
+        (args, remaining) = parser.parse_known_args(args=remaining, namespace=args)
+        return args
+
     def call_pre_suite(self, testcount, testidlist):
         for pgn_inst in self.plugin_instances:
             pgn_inst.pre_suite(testcount, testidlist)
@@ -98,6 +159,9 @@ class PluginMgr:
             command = pgn_inst.adjust_command(stage, command)
         return command
 
+    def set_args(self, args):
+        self.args = args
+
     @staticmethod
     def _make_argparser(args):
         self.argparser = argparse.ArgumentParser(
@@ -550,6 +614,7 @@ def filter_tests_by_category(args, testlist):
 
     return answer
 
+
 def get_test_cases(args):
     """
     If a test case file is specified, retrieve tests from that file.
@@ -611,7 +676,7 @@ def get_test_cases(args):
     return allcatlist, allidlist, testcases_by_cats, alltestcases
 
 
-def set_operation_mode(pm, args):
+def set_operation_mode(pm, parser, args, remaining):
     """
     Load the test case data and process remaining arguments to determine
     what the script should do for this run, and call the appropriate
@@ -649,6 +714,12 @@ def set_operation_mode(pm, args):
             exit(0)
 
     if len(alltests):
+        req_plugins = pm.get_required_plugins(alltests)
+        try:
+            args = pm.load_required_plugins(req_plugins, parser, args, remaining)
+        except PluginDependencyException as pde:
+            print('The following plugins were not found:')
+            print('{}'.format(pde.missing_pg))
         catresults = test_runner(pm, args, alltests)
         if args.format == 'none':
             print('Test results output suppression requested\n')
@@ -686,11 +757,12 @@ def main():
     parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
     args.NAMES = NAMES
+    pm.set_args(args)
     check_default_settings(args, remaining, pm)
     if args.verbose > 2:
         print('args is {}'.format(args))
 
-    set_operation_mode(pm, args)
+    set_operation_mode(pm, parser, args, remaining)
 
     exit(0)
 
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index 942c70c041be..b771d4c89621 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -10,6 +10,8 @@ Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
 NAMES = {
           # Substitute your own tc path here
           'TC': '/sbin/tc',
+          # Substitute your own ip path here
+          'IP': '/sbin/ip',
           # Name of veth devices to be created for the namespace
           'DEV0': 'v0p0',
           'DEV1': 'v0p1',
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
index 9f35c96c88a0..0440d252c4c5 100644
--- a/tools/testing/selftests/tc-testing/tdc_helper.py
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -17,7 +17,10 @@ def get_categorized_testlist(alltests, ucat):
 
 def get_unique_item(lst):
     """ For a list, return a list of the unique items in the list. """
-    return list(set(lst))
+    if len(lst) > 1:
+        return list(set(lst))
+    else:
+        return lst
 
 
 def get_test_categories(alltests):