From 9b7e94e6e89be284df94554c8b3c08f91c5c04c3 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 22 May 2019 12:09:16 -0700
Subject: selftests: fib-onlink: Make quiet by default

Add VERBOSE argument to fib-onlink-tests.sh and make output quiet by
default. Add getopt parsing of inputs and support for -v (verbose) and
-p (pause on fail).

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib-onlink-tests.sh | 48 ++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 864f865eee55..c287b90b8af8 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -4,6 +4,7 @@
 # IPv4 and IPv6 onlink tests
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+VERBOSE=0
 
 # Network interfaces
 # - odd in current namespace; even in peer ns
@@ -91,10 +92,10 @@ log_test()
 
 	if [ ${rc} -eq ${expected} ]; then
 		nsuccess=$((nsuccess+1))
-		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+		printf "    TEST: %-50s  [ OK ]\n" "${msg}"
 	else
 		nfail=$((nfail+1))
-		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		printf "    TEST: %-50s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 			echo
 			echo "hit enter to continue, 'q' to quit"
@@ -121,9 +122,23 @@ log_subsection()
 
 run_cmd()
 {
-	echo
-	echo "COMMAND: $*"
-	eval $*
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
 }
 
 get_linklocal()
@@ -450,12 +465,35 @@ run_onlink_tests()
 	invalid_onlink_ipv6
 }
 
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -p          Pause on fail
+        -v          verbose mode (show commands and output)
+EOF
+}
+
 ################################################################################
 # main
 
 nsuccess=0
 nfail=0
 
+while getopts :t:pPhv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
 cleanup
 setup
 run_onlink_tests
-- 
cgit v1.2.3-59-g8ed1b


From a92a0a7b8e7c1e3dbc0c8a7fed7b52e7915e8c11 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 22 May 2019 12:11:06 -0700
Subject: selftests: pmtu: Simplify cleanup and namespace names

The point of the pause-on-fail argument is to leave the setup as is after
a test fails to allow a user to debug why it failed. Move the cleanup
after posting the result to the user to make it so.

Random names for the namespaces are not user friendly when trying to
debug a failure. Make them simpler and more direct for the tests. Run
cleanup at the beginning to ensure they are cleaned up if they already
exist.

Remove cleanup_done. There is no harm in doing cleanup twice; just
ignore any errors related to not existing - which is already done.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index b9171a7b3aaa..ab77e6344d17 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -152,10 +152,10 @@ tests="
 	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions
 	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions"
 
-NS_A="ns-$(mktemp -u XXXXXX)"
-NS_B="ns-$(mktemp -u XXXXXX)"
-NS_R1="ns-$(mktemp -u XXXXXX)"
-NS_R2="ns-$(mktemp -u XXXXXX)"
+NS_A="ns-A"
+NS_B="ns-B"
+NS_R1="ns-R1"
+NS_R2="ns-R2"
 ns_a="ip netns exec ${NS_A}"
 ns_b="ip netns exec ${NS_B}"
 ns_r1="ip netns exec ${NS_R1}"
@@ -212,7 +212,6 @@ dummy6_0_addr="fc00:1000::0"
 dummy6_1_addr="fc00:1001::0"
 dummy6_mask="64"
 
-cleanup_done=1
 err_buf=
 tcpdump_pids=
 
@@ -495,7 +494,7 @@ setup_routing() {
 setup() {
 	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
 
-	cleanup_done=0
+	cleanup
 	for arg do
 		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
 	done
@@ -519,11 +518,9 @@ cleanup() {
 	done
 	tcpdump_pids=
 
-	[ ${cleanup_done} -eq 1 ] && return
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns del ${n} 2> /dev/null
 	done
-	cleanup_done=1
 }
 
 mtu() {
@@ -1136,6 +1133,9 @@ done
 
 trap cleanup EXIT
 
+# start clean
+cleanup
+
 for t in ${tests}; do
 	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
 
@@ -1156,7 +1156,6 @@ for t in ${tests}; do
 
 		eval test_${name}
 		ret=$?
-		cleanup
 
 		if [ $ret -eq 0 ]; then
 			printf "TEST: %-60s  [ OK ]\n" "${t}"
-- 
cgit v1.2.3-59-g8ed1b


From 9efc7794496d531593751a3fb008030d5f9ba87c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 22 May 2019 10:51:28 -0700
Subject: libbpf: emit diff of mismatched public API, if any

It's easy to have a mismatch of "intended to be public" vs really
exposed API functions. While Makefile does check for this mismatch, if
it actually occurs it's not trivial to determine which functions are
accidentally exposed. This patch dumps out a diff showing what's not
supposed to be exposed facilitating easier fixing.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Makefile | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index f91639bf5650..a2aceadf68db 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -204,6 +204,16 @@ check_abi: $(OUTPUT)libbpf.so
 		     "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
 		     "Please make sure all LIBBPF_API symbols are"	 \
 		     "versioned in $(VERSION_SCRIPT)." >&2;		 \
+		readelf -s --wide $(OUTPUT)libbpf-in.o |		 \
+		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'|   \
+		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
+		readelf -s --wide $(OUTPUT)libbpf.so |			 \
+		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
+		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
+		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
+		     $(OUTPUT)libbpf_versioned_syms.tmp;		 \
+		rm $(OUTPUT)libbpf_global_syms.tmp			 \
+		   $(OUTPUT)libbpf_versioned_syms.tmp;			 \
 		exit 1;							 \
 	fi
 
-- 
cgit v1.2.3-59-g8ed1b


From 7c0c6095d48dcd0e67c917aa73cdbb2715aafc36 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 21 May 2019 20:14:20 -0700
Subject: selftests/bpf: adjust verifier scale test

Adjust scale tests to check for new jmp sequence limit.

BPF_JGT had to be changed to BPF_JEQ because the verifier was
too smart. It tracked the known safe range of R0 values
and pruned the search earlier before hitting exact 8192 limit.
bpf_semi_rand_get() was too (un)?lucky.

k = 0; was missing in bpf_fill_scale2.
It was testing a bit shorter sequence of jumps than intended.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 31 ++++++++++++++++-------------
 1 file changed, 17 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ccd896b98cac..6e2fec84c929 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -210,33 +210,35 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self)
 	self->retval = (uint32_t)res;
 }
 
-/* test the sequence of 1k jumps */
+#define MAX_JMP_SEQ 8192
+
+/* test the sequence of 8k jumps */
 static void bpf_fill_scale1(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
 	int i = 0, k = 0;
 
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % 64 + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	/* every jump adds 1 step to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
-	while (i < MAX_TEST_INSNS - 1025)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
 		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
 	self->retval = 42;
 }
 
-/* test the sequence of 1k jumps in inner most function (function depth 8)*/
+/* test the sequence of 8k jumps in inner most function (function depth 8)*/
 static void bpf_fill_scale2(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
@@ -248,19 +250,20 @@ static void bpf_fill_scale2(struct bpf_test *self)
 		insn[i++] = BPF_EXIT_INSN();
 	}
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
-	/* test to check that the sequence of 1024 jumps is acceptable */
-	while (k++ < 1024) {
+	/* test to check that the long sequence of jumps is acceptable */
+	k = 0;
+	while (k++ < MAX_JMP_SEQ) {
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_get_prandom_u32);
-		insn[i++] = BPF_JMP_IMM(BPF_JGT, BPF_REG_0, bpf_semi_rand_get(), 2);
+		insn[i++] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, bpf_semi_rand_get(), 2);
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_10);
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % (64 - 4 * FUNC_NEST) + 1));
 	}
-	/* every jump adds 1024 steps to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - 1025 MOVs and 1 EXIT
+	/* every jump adds 1 step to insn_processed, so to stay exactly
+	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
-	while (i < MAX_TEST_INSNS - 1025)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
 		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
-- 
cgit v1.2.3-59-g8ed1b


From 7c9441066ab53168093c79477aabd575f7c14129 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 21 May 2019 20:14:21 -0700
Subject: selftests/bpf: add pyperf scale test

Add a snippet of pyperf bpf program used to collect python stack traces
as a scale test for the verifier.

At 189 loop iterations llvm 9.0 starts ignoring '#pragma unroll'
and generates partially unrolled loop instead.
Hence use 50, 100, and 180 loop iterations to stress test.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/bpf_verif_scale.c     |  31 ++-
 tools/testing/selftests/bpf/progs/pyperf.h         | 268 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/pyperf100.c      |   4 +
 tools/testing/selftests/bpf/progs/pyperf180.c      |   4 +
 tools/testing/selftests/bpf/progs/pyperf50.c       |   4 +
 5 files changed, 298 insertions(+), 13 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/pyperf.h
 create mode 100644 tools/testing/selftests/bpf/progs/pyperf100.c
 create mode 100644 tools/testing/selftests/bpf/progs/pyperf180.c
 create mode 100644 tools/testing/selftests/bpf/progs/pyperf50.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index b74e2f6e96d0..ce03289c9077 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -12,7 +12,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
 	return vfprintf(stderr, "%s", args);
 }
 
-static int check_load(const char *file)
+static int check_load(const char *file, enum bpf_prog_type type)
 {
 	struct bpf_prog_load_attr attr;
 	struct bpf_object *obj = NULL;
@@ -20,7 +20,7 @@ static int check_load(const char *file)
 
 	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
 	attr.file = file;
-	attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+	attr.prog_type = type;
 	attr.log_level = 4;
 	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
 	bpf_object__close(obj);
@@ -31,19 +31,24 @@ static int check_load(const char *file)
 
 void test_bpf_verif_scale(void)
 {
-	const char *file1 = "./test_verif_scale1.o";
-	const char *file2 = "./test_verif_scale2.o";
-	const char *file3 = "./test_verif_scale3.o";
-	int err;
+	const char *scale[] = {
+		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o"
+	};
+	const char *pyperf[] = {
+		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o"
+	};
+	int err, i;
 
 	if (verifier_stats)
 		libbpf_set_print(libbpf_debug_print);
 
-	err = check_load(file1);
-	err |= check_load(file2);
-	err |= check_load(file3);
-	if (!err)
-		printf("test_verif_scale:OK\n");
-	else
-		printf("test_verif_scale:FAIL\n");
+	for (i = 0; i < ARRAY_SIZE(scale); i++) {
+		err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS);
+		printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
+	}
+
+	for (i = 0; i < ARRAY_SIZE(pyperf); i++) {
+		err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
+		printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK");
+	}
 }
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
new file mode 100644
index 000000000000..0cc5e4ee90bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define FUNCTION_NAME_LEN 64
+#define FILE_NAME_LEN 128
+#define TASK_COMM_LEN 16
+
+typedef struct {
+	int PyThreadState_frame;
+	int PyThreadState_thread;
+	int PyFrameObject_back;
+	int PyFrameObject_code;
+	int PyFrameObject_lineno;
+	int PyCodeObject_filename;
+	int PyCodeObject_name;
+	int String_data;
+	int String_size;
+} OffsetConfig;
+
+typedef struct {
+	uintptr_t current_state_addr;
+	uintptr_t tls_key_addr;
+	OffsetConfig offsets;
+	bool use_tls;
+} PidData;
+
+typedef struct {
+	uint32_t success;
+} Stats;
+
+typedef struct {
+	char name[FUNCTION_NAME_LEN];
+	char file[FILE_NAME_LEN];
+} Symbol;
+
+typedef struct {
+	uint32_t pid;
+	uint32_t tid;
+	char comm[TASK_COMM_LEN];
+	int32_t kernel_stack_id;
+	int32_t user_stack_id;
+	bool thread_current;
+	bool pthread_match;
+	bool stack_complete;
+	int16_t stack_len;
+	int32_t stack[STACK_MAX_LEN];
+
+	int has_meta;
+	int metadata;
+	char dummy_safeguard;
+} Event;
+
+
+struct bpf_elf_map {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+};
+
+typedef int pid_t;
+
+typedef struct {
+	void* f_back; // PyFrameObject.f_back, previous frame
+	void* f_code; // PyFrameObject.f_code, pointer to PyCodeObject
+	void* co_filename; // PyCodeObject.co_filename
+	void* co_name; // PyCodeObject.co_name
+} FrameData;
+
+static inline __attribute__((__always_inline__)) void*
+get_thread_state(void* tls_base, PidData* pidData)
+{
+	void* thread_state;
+	int key;
+
+	bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr);
+	bpf_probe_read(&thread_state, sizeof(thread_state),
+		       tls_base + 0x310 + key * 0x10 + 0x08);
+	return thread_state;
+}
+
+static inline __attribute__((__always_inline__)) bool
+get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol)
+{
+	// read data from PyFrameObject
+	bpf_probe_read(&frame->f_back,
+		       sizeof(frame->f_back),
+		       frame_ptr + pidData->offsets.PyFrameObject_back);
+	bpf_probe_read(&frame->f_code,
+		       sizeof(frame->f_code),
+		       frame_ptr + pidData->offsets.PyFrameObject_code);
+
+	// read data from PyCodeObject
+	if (!frame->f_code)
+		return false;
+	bpf_probe_read(&frame->co_filename,
+		       sizeof(frame->co_filename),
+		       frame->f_code + pidData->offsets.PyCodeObject_filename);
+	bpf_probe_read(&frame->co_name,
+		       sizeof(frame->co_name),
+		       frame->f_code + pidData->offsets.PyCodeObject_name);
+	// read actual names into symbol
+	if (frame->co_filename)
+		bpf_probe_read_str(&symbol->file,
+				   sizeof(symbol->file),
+				   frame->co_filename + pidData->offsets.String_data);
+	if (frame->co_name)
+		bpf_probe_read_str(&symbol->name,
+				   sizeof(symbol->name),
+				   frame->co_name + pidData->offsets.String_data);
+	return true;
+}
+
+struct bpf_elf_map SEC("maps") pidmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(int),
+	.size_value = sizeof(PidData),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") eventmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(int),
+	.size_value = sizeof(Event),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") symbolmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.size_key = sizeof(Symbol),
+	.size_value = sizeof(int),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") statsmap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.size_key = sizeof(Stats),
+	.size_value = sizeof(int),
+	.max_elem = 1,
+};
+
+struct bpf_elf_map SEC("maps") perfmap = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.size_key = sizeof(int),
+	.size_value = sizeof(int),
+	.max_elem = 32,
+};
+
+struct bpf_elf_map SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.size_key = sizeof(int),
+	.size_value = sizeof(long long) * 127,
+	.max_elem = 1000,
+};
+
+static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx)
+{
+	uint64_t pid_tgid = bpf_get_current_pid_tgid();
+	pid_t pid = (pid_t)(pid_tgid >> 32);
+	PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid);
+	if (!pidData)
+		return 0;
+
+	int zero = 0;
+	Event* event = bpf_map_lookup_elem(&eventmap, &zero);
+	if (!event)
+		return 0;
+
+	event->pid = pid;
+
+	event->tid = (pid_t)pid_tgid;
+	bpf_get_current_comm(&event->comm, sizeof(event->comm));
+
+	event->user_stack_id = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+	event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0);
+
+	void* thread_state_current = (void*)0;
+	bpf_probe_read(&thread_state_current,
+		       sizeof(thread_state_current),
+		       (void*)(long)pidData->current_state_addr);
+
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+	void* tls_base = (void*)task;
+
+	void* thread_state = pidData->use_tls ? get_thread_state(tls_base, pidData)
+		: thread_state_current;
+	event->thread_current = thread_state == thread_state_current;
+
+	if (pidData->use_tls) {
+		uint64_t pthread_created;
+		uint64_t pthread_self;
+		bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
+
+		bpf_probe_read(&pthread_created,
+			       sizeof(pthread_created),
+			       thread_state + pidData->offsets.PyThreadState_thread);
+		event->pthread_match = pthread_created == pthread_self;
+	} else {
+		event->pthread_match = 1;
+	}
+
+	if (event->pthread_match || !pidData->use_tls) {
+		void* frame_ptr;
+		FrameData frame;
+		Symbol sym = {};
+		int cur_cpu = bpf_get_smp_processor_id();
+
+		bpf_probe_read(&frame_ptr,
+			       sizeof(frame_ptr),
+			       thread_state + pidData->offsets.PyThreadState_frame);
+
+		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
+		if (symbol_counter == NULL)
+			return 0;
+#pragma unroll
+		/* Unwind python stack */
+		for (int i = 0; i < STACK_MAX_LEN; ++i) {
+			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
+				int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+				int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+				if (!symbol_id) {
+					bpf_map_update_elem(&symbolmap, &sym, &zero, 0);
+					symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
+					if (!symbol_id)
+						return 0;
+				}
+				if (*symbol_id == new_symbol_id)
+					(*symbol_counter)++;
+				event->stack[i] = *symbol_id;
+				event->stack_len = i + 1;
+				frame_ptr = frame.f_back;
+			}
+		}
+		event->stack_complete = frame_ptr == NULL;
+	} else {
+		event->stack_complete = 1;
+	}
+
+	Stats* stats = bpf_map_lookup_elem(&statsmap, &zero);
+	if (stats)
+		stats->success++;
+
+	event->has_meta = 0;
+	bpf_perf_event_output(ctx, &perfmap, 0, event, offsetof(Event, metadata));
+	return 0;
+}
+
+SEC("raw_tracepoint/kfree_skb")
+int on_event(struct pt_regs* ctx)
+{
+	int i, ret = 0;
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	ret |= __on_event(ctx);
+	return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf100.c b/tools/testing/selftests/bpf/progs/pyperf100.c
new file mode 100644
index 000000000000..29786325db54
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf100.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 100
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
new file mode 100644
index 000000000000..c39f559d3100
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 180
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf50.c b/tools/testing/selftests/bpf/progs/pyperf50.c
new file mode 100644
index 000000000000..ef7ce340a292
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf50.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 50
+#include "pyperf.h"
-- 
cgit v1.2.3-59-g8ed1b


From ec8105352869766569f657464c8a78d7bc0e2406 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 22 May 2019 20:28:01 -0700
Subject: selftests: Add redirect tests

Add test for ICMP redirects and exception processing. Test is setup
for later addition of tests using nexthop objects for routing.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/icmp_redirect.sh | 455 +++++++++++++++++++++++++++
 1 file changed, 455 insertions(+)
 create mode 100755 tools/testing/selftests/net/icmp_redirect.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
new file mode 100755
index 000000000000..e8d8496f5b16
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -0,0 +1,455 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# redirect test
+#
+#                     .253 +----+
+#                     +----| r1 |
+#                     |    +----+
+# +----+              |       |.1
+# | h1 |--------------+       |   10.1.1.0/30 2001:db8:1::0/126
+# +----+ .1           |       |.2
+#         172.16.1/24 |    +----+                   +----+
+#    2001:db8:16:1/64 +----| r2 |-------------------| h2 |
+#                     .254 +----+ .254           .2 +----+
+#                                    172.16.2/24
+#                                  2001:db8:16:2/64
+#
+# Route from h1 to h2 goes through r1, eth1 - connection between r1 and r2.
+# Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent
+# from r1 to h1 telling h1 to use r2 when talking to h2.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+R1_R2_N1_IP=10.1.1.1
+R2_R1_N1_IP=10.1.1.2
+
+R1_R2_N1_IP6=2001:db8:1::1
+R2_R1_N1_IP6=2001:db8:1::2
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+H2_N2_IP=172.16.2.2
+R2_N2_IP=172.16.2.254
+H2_N2_IP6=2001:db8:16:2::2
+R2_N2_IP6=2001:db8:16:2::254
+
+VRF=red
+VRF_TABLE=1111
+
+################################################################################
+# helpers
+
+log_section()
+{
+	echo
+	echo "###########################################################################"
+	echo "$*"
+	echo "###########################################################################"
+	echo
+}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+run_cmd()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "$out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+get_linklocal()
+{
+	local ns=$1
+	local dev=$2
+	local addr
+
+	addr=$(ip -netns $ns -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+	local ns
+
+	for ns in h1 h2 r1 r2; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+create_vrf()
+{
+	local ns=$1
+
+	ip -netns ${ns} link add ${VRF} type vrf table ${VRF_TABLE}
+	ip -netns ${ns} link set ${VRF} up
+	ip -netns ${ns} route add vrf ${VRF} unreachable default metric 8192
+	ip -netns ${ns} -6 route add vrf ${VRF} unreachable default metric 8192
+
+	ip -netns ${ns} addr add 127.0.0.1/8 dev ${VRF}
+	ip -netns ${ns} -6 addr add ::1 dev ${VRF} nodad
+
+	ip -netns ${ns} ru del pref 0
+	ip -netns ${ns} ru add pref 32765 from all lookup local
+	ip -netns ${ns} -6 ru del pref 0
+	ip -netns ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+setup()
+{
+	local ns
+
+	#
+	# create nodes as namespaces
+	#
+	for ns in h1 h2 r1 r2; do
+		ip netns add $ns
+		ip -netns $ns li set lo up
+
+		case "${ns}" in
+		h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+			;;
+		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+		       ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
+		esac
+	done
+
+	#
+	# create interconnects
+	#
+	ip -netns h1 li add eth0 type veth peer name r1h1
+	ip -netns h1 li set r1h1 netns r1 name eth0 up
+
+	ip -netns h1 li add eth1 type veth peer name r2h1
+	ip -netns h1 li set r2h1 netns r2 name eth0 up
+
+	ip -netns h2 li add eth0 type veth peer name r2h2
+	ip -netns h2 li set eth0 up
+	ip -netns h2 li set r2h2 netns r2 name eth2 up
+
+	ip -netns r1 li add eth1 type veth peer name r2r1
+	ip -netns r1 li set eth1 up
+	ip -netns r1 li set r2r1 netns r2 name eth1 up
+
+	#
+	# h1
+	#
+	if [ "${WITH_VRF}" = "yes" ]; then
+		create_vrf "h1"
+		H1_VRF_ARG="vrf ${VRF}"
+		H1_PING_ARG="-I ${VRF}"
+	else
+		H1_VRF_ARG=
+		H1_PING_ARG=
+	fi
+	ip -netns h1 li add br0 type bridge
+	if [ "${WITH_VRF}" = "yes" ]; then
+		ip -netns h1 li set br0 vrf ${VRF} up
+	else
+		ip -netns h1 li set br0 up
+	fi
+	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns h1 li set eth0 master br0 up
+	ip -netns h1 li set eth1 master br0 up
+
+	#
+	# h2
+	#
+	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns h2 ro add default via ${R2_N2_IP} dev eth0
+	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0
+
+	#
+	# r1
+	#
+	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30
+	ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
+
+	#
+	# r2
+	#
+	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30
+	ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
+	ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24
+	ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
+
+	sleep 2
+
+	R1_LLADDR=$(get_linklocal r1 eth0)
+	if [ $? -ne 0 ]; then
+		echo "Error: Failed to get link-local address of r1's eth0"
+		exit 1
+	fi
+
+	R2_LLADDR=$(get_linklocal r2 eth0)
+	if [ $? -ne 0 ]; then
+		echo "Error: Failed to get link-local address of r2's eth0"
+		exit 1
+	fi
+}
+
+change_h2_mtu()
+{
+	local mtu=$1
+
+	run_cmd ip -netns h2 li set eth0 mtu ${mtu}
+	run_cmd ip -netns r2 li set eth2 mtu ${mtu}
+}
+
+check_exception()
+{
+	local mtu="$1"
+	local with_redirect="$2"
+	local desc="$3"
+
+	# From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102)
+	if [ "$VERBOSE" = "1" ]; then
+		echo "Commands to check for exception:"
+		run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
+		run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
+	fi
+
+	if [ -n "${mtu}" ]; then
+		mtu=" mtu ${mtu}"
+	fi
+	if [ "$with_redirect" = "yes" ]; then
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -q "cache <redirected> expires [0-9]*sec${mtu}"
+	elif [ -n "${mtu}" ]; then
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -q "cache expires [0-9]*sec${mtu}"
+	else
+		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+		grep -q "cache"
+	fi
+	log_test $? 0 "IPv4: ${desc}"
+
+	if [ "$with_redirect" = "yes" ]; then
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+	else
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | grep -q "${mtu}"
+	fi
+	log_test $? 0 "IPv6: ${desc}"
+}
+
+run_ping()
+{
+	local sz=$1
+
+	run_cmd ip netns exec h1 ping -q -M want -i 0.2 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.2 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+}
+
+replace_route_legacy()
+{
+	# r1 to h2 via r2 and eth0
+	run_cmd ip -netns r1    ro replace ${H2_N2}   via ${R2_N1_IP}  dev eth0
+	run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
+}
+
+initial_route_legacy()
+{
+	# r1 to h2 via r2 and eth1
+	run_cmd ip -netns r1    ro add ${H2_N2}   via ${R2_R1_N1_IP}  dev eth1
+	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
+
+	# h1 to h2 via r1
+	# - IPv6 redirect only works if gateway is the LLA
+	run_cmd ip -netns h1    ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
+}
+
+check_connectivity()
+{
+	local rc
+
+	run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
+	rc=$?
+	run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
+	[ $? -ne 0 ] && rc=$?
+
+	return $rc
+}
+
+do_test()
+{
+	local ttype="$1"
+
+	eval initial_route_${ttype}
+
+	# verify connectivity
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken"
+		ret=1
+		return
+	fi
+
+	# redirect exception followed by mtu
+	eval replace_route_${ttype}
+	run_ping 64
+	check_exception "" "yes" "redirect exception"
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after redirect"
+		ret=1
+		return
+	fi
+
+	change_h2_mtu 1300
+	run_ping 1350
+	check_exception "1300" "yes" "redirect exception plus mtu"
+
+	# remove exceptions and restore routing
+	change_h2_mtu 1500
+	ip -netns h1 li set br0 down
+	ip -netns h1 li set br0 up
+	eval initial_route_${ttype}
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after reset"
+		ret=1
+		return
+	fi
+	check_exception "" "no" "routing reset"
+
+	# MTU exception followed by redirect
+	change_h2_mtu 1300
+	run_ping 1350
+	check_exception "1300" "no" "mtu exception"
+
+	eval replace_route_${ttype}
+	run_ping 64
+	check_exception "1300" "yes" "mtu exception plus redirect"
+
+	check_connectivity
+	if [ $? -ne 0 ]; then
+		echo "Error: Basic connectivity is broken after redirect"
+		ret=1
+		return
+	fi
+}
+
+################################################################################
+# usage
+
+usage()
+{
+        cat <<EOF
+usage: ${0##*/} OPTS
+
+	-p          Pause on fail
+	-v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :pv o
+do
+	case $o in
+                p) PAUSE_ON_FAIL=yes;;
+                v) VERBOSE=$(($VERBOSE + 1));;
+                *) usage; exit 1;;
+	esac
+done
+
+trap cleanup EXIT
+
+cleanup
+WITH_VRF=no
+setup
+
+log_section "Legacy routing"
+do_test "legacy"
+
+cleanup
+log_section "Legacy routing with VRF"
+WITH_VRF=yes
+setup
+do_test "legacy"
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
-- 
cgit v1.2.3-59-g8ed1b


From af5136f95045b6c4bb8a53d2f288a905c3bd6f25 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 23 May 2019 13:48:46 -0400
Subject: selftests/net: SO_TXTIME with ETF and FQ

The SO_TXTIME API enables packet tranmission with delayed delivery.
This is currently supported by the ETF and FQ packet schedulers.

Evaluate the interface with both schedulers. Install the scheduler
and send a variety of packets streams: without delay, with one
delayed packet, with multiple ordered delays and with reordering.
Verify that packets are released by the scheduler in expected order.

The ETF qdisc requires a timestamp in the future on every packet. It
needs a delay on the qdisc else the packet is dropped on dequeue for
having a delivery time in the past. The test value is experimentally
derived. ETF requires clock_id CLOCK_TAI. It checks this base and
drops for non-conformance.

The FQ qdisc expects clock_id CLOCK_MONOTONIC, the base used by TCP
as of commit fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC").
Within a flow there is an expecation of ordered delivery, as shown by
delivery times of test 4. The FQ qdisc does not require all packets to
have timestamps and does not drop for non-conformance.

The large (msec) delays are chosen to avoid flakiness.

	Output:

	SO_TXTIME ipv6 clock monotonic
	payload:a delay:28 expected:0 (us)

	SO_TXTIME ipv4 clock monotonic
	payload:a delay:38 expected:0 (us)

	SO_TXTIME ipv6 clock monotonic
	payload:a delay:40 expected:0 (us)

	SO_TXTIME ipv4 clock monotonic
	payload:a delay:33 expected:0 (us)

	SO_TXTIME ipv6 clock monotonic
	payload:a delay:10120 expected:10000 (us)

	SO_TXTIME ipv4 clock monotonic
	payload:a delay:10102 expected:10000 (us)

	[.. etc ..]

	OK. All tests passed

Changes v1->v2: update commit message output

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/.gitignore   |   1 +
 tools/testing/selftests/net/Makefile     |   3 +-
 tools/testing/selftests/net/config       |   2 +
 tools/testing/selftests/net/so_txtime.c  | 296 +++++++++++++++++++++++++++++++
 tools/testing/selftests/net/so_txtime.sh |  31 ++++
 5 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/so_txtime.c
 create mode 100755 tools/testing/selftests/net/so_txtime.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6f81130605d7..27ef4d07ac91 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -17,3 +17,4 @@ tcp_inq
 tls
 txring_overwrite
 ip_defrag
+so_txtime
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 1e6d14d2825c..8af7869e0f1c 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -9,12 +9,13 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh
+TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
+TEST_GEN_FILES += so_txtime
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 474040448601..89f84b5118bf 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -25,3 +25,5 @@ CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_ETF=m
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
new file mode 100644
index 000000000000..53f598f06647
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test the SO_TXTIME API
+ *
+ * Takes two streams of { payload, delivery time }[], one input and one output.
+ * Sends the input stream and verifies arrival matches the output stream.
+ * The two streams can differ due to out-of-order delivery and drops.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/net_tstamp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+static int	cfg_clockid	= CLOCK_TAI;
+static bool	cfg_do_ipv4;
+static bool	cfg_do_ipv6;
+static uint16_t	cfg_port	= 8000;
+static int	cfg_variance_us	= 2000;
+
+static uint64_t glob_tstart;
+
+/* encode one timed transmission (of a 1B payload) */
+struct timed_send {
+	char	data;
+	int64_t	delay_us;
+};
+
+#define MAX_NUM_PKT	8
+static struct timed_send cfg_in[MAX_NUM_PKT];
+static struct timed_send cfg_out[MAX_NUM_PKT];
+static int cfg_num_pkt;
+
+static uint64_t gettime_ns(void)
+{
+	struct timespec ts;
+
+	if (clock_gettime(cfg_clockid, &ts))
+		error(1, errno, "gettime");
+
+	return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
+}
+
+static void do_send_one(int fdt, struct timed_send *ts)
+{
+	char control[CMSG_SPACE(sizeof(uint64_t))];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+	uint64_t tdeliver;
+	int ret;
+
+	iov.iov_base = &ts->data;
+	iov.iov_len = 1;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	if (ts->delay_us >= 0) {
+		memset(control, 0, sizeof(control));
+		msg.msg_control = &control;
+		msg.msg_controllen = sizeof(control);
+
+		tdeliver = glob_tstart + ts->delay_us * 1000;
+
+		cm = CMSG_FIRSTHDR(&msg);
+		cm->cmsg_level = SOL_SOCKET;
+		cm->cmsg_type = SCM_TXTIME;
+		cm->cmsg_len = CMSG_LEN(sizeof(tdeliver));
+		memcpy(CMSG_DATA(cm), &tdeliver, sizeof(tdeliver));
+	}
+
+	ret = sendmsg(fdt, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret == 0)
+		error(1, 0, "write: 0B");
+
+}
+
+static void do_recv_one(int fdr, struct timed_send *ts)
+{
+	int64_t tstop, texpect;
+	char rbuf[2];
+	int ret;
+
+	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+	if (ret == -1)
+		error(1, errno, "read");
+	if (ret != 1)
+		error(1, 0, "read: %dB", ret);
+
+	tstop = (gettime_ns() - glob_tstart) / 1000;
+	texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
+
+	fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n",
+			rbuf[0], tstop, texpect);
+
+	if (rbuf[0] != ts->data)
+		error(1, 0, "payload mismatch. expected %c", ts->data);
+
+	if (labs(tstop - texpect) > cfg_variance_us)
+		error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+}
+
+static void do_recv_verify_empty(int fdr)
+{
+	char rbuf[1];
+	int ret;
+
+	ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+	if (ret != -1 || errno != EAGAIN)
+		error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
+}
+
+static void setsockopt_txtime(int fd)
+{
+	struct sock_txtime so_txtime_val = { .clockid = cfg_clockid };
+	struct sock_txtime so_txtime_val_read = { 0 };
+	socklen_t vallen = sizeof(so_txtime_val);
+
+	if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+		       &so_txtime_val, sizeof(so_txtime_val)))
+		error(1, errno, "setsockopt txtime");
+
+	if (getsockopt(fd, SOL_SOCKET, SO_TXTIME,
+		       &so_txtime_val_read, &vallen))
+		error(1, errno, "getsockopt txtime");
+
+	if (vallen != sizeof(so_txtime_val) ||
+	    memcmp(&so_txtime_val, &so_txtime_val_read, vallen))
+		error(1, 0, "getsockopt txtime: mismatch");
+}
+
+static int setup_tx(struct sockaddr *addr, socklen_t alen)
+{
+	int fd;
+
+	fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	if (connect(fd, addr, alen))
+		error(1, errno, "connect");
+
+	setsockopt_txtime(fd);
+
+	return fd;
+}
+
+static int setup_rx(struct sockaddr *addr, socklen_t alen)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fd;
+
+	fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (bind(fd, addr, alen))
+		error(1, errno, "bind");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	return fd;
+}
+
+static void do_test(struct sockaddr *addr, socklen_t alen)
+{
+	int fdt, fdr, i;
+
+	fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
+			addr->sa_family == PF_INET ? '4' : '6',
+			cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
+
+	fdt = setup_tx(addr, alen);
+	fdr = setup_rx(addr, alen);
+
+	glob_tstart = gettime_ns();
+
+	for (i = 0; i < cfg_num_pkt; i++)
+		do_send_one(fdt, &cfg_in[i]);
+	for (i = 0; i < cfg_num_pkt; i++)
+		do_recv_one(fdr, &cfg_out[i]);
+
+	do_recv_verify_empty(fdr);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdt))
+		error(1, errno, "close t");
+}
+
+static int parse_io(const char *optarg, struct timed_send *array)
+{
+	char *arg, *tok;
+	int aoff = 0;
+
+	arg = strdup(optarg);
+	if (!arg)
+		error(1, errno, "strdup");
+
+	while ((tok = strtok(arg, ","))) {
+		arg = NULL;	/* only pass non-zero on first call */
+
+		if (aoff / 2 == MAX_NUM_PKT)
+			error(1, 0, "exceeds max pkt count (%d)", MAX_NUM_PKT);
+
+		if (aoff & 1) {	/* parse delay */
+			array->delay_us = strtol(tok, NULL, 0) * 1000;
+			array++;
+		} else {	/* parse character */
+			array->data = tok[0];
+		}
+
+		aoff++;
+	}
+
+	free(arg);
+
+	return aoff / 2;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c, ilen, olen;
+
+	while ((c = getopt(argc, argv, "46c:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'c':
+			if (!strcmp(optarg, "tai"))
+				cfg_clockid = CLOCK_TAI;
+			else if (!strcmp(optarg, "monotonic") ||
+				 !strcmp(optarg, "mono"))
+				cfg_clockid = CLOCK_MONOTONIC;
+			else
+				error(1, 0, "unknown clock id %s", optarg);
+			break;
+		default:
+			error(1, 0, "parse error at %d", optind);
+		}
+	}
+
+	if (argc - optind != 2)
+		error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+
+	ilen = parse_io(argv[optind], cfg_in);
+	olen = parse_io(argv[optind + 1], cfg_out);
+	if (ilen != olen)
+		error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
+	cfg_num_pkt = ilen;
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_do_ipv6) {
+		struct sockaddr_in6 addr6 = {0};
+
+		addr6.sin6_family = AF_INET6;
+		addr6.sin6_port = htons(cfg_port);
+		addr6.sin6_addr = in6addr_loopback;
+		do_test((void *)&addr6, sizeof(addr6));
+	}
+
+	if (cfg_do_ipv4) {
+		struct sockaddr_in addr4 = {0};
+
+		addr4.sin_family = AF_INET;
+		addr4.sin_port = htons(cfg_port);
+		addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		do_test((void *)&addr4, sizeof(addr4));
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
new file mode 100755
index 000000000000..5aa519328a5b
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for the SO_TXTIME interface
+
+# Run in network namespace
+if [[ $# -eq 0 ]]; then
+	./in_netns.sh $0 __subprocess
+	exit $?
+fi
+
+set -e
+
+tc qdisc add dev lo root fq
+./so_txtime -4 -6 -c mono a,-1 a,-1
+./so_txtime -4 -6 -c mono a,0 a,0
+./so_txtime -4 -6 -c mono a,10 a,10
+./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
+./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
+
+if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then
+	! ./so_txtime -4 -6 -c tai a,-1 a,-1
+	! ./so_txtime -4 -6 -c tai a,0 a,0
+	./so_txtime -4 -6 -c tai a,10 a,10
+	./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
+	./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+else
+	echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+fi
+
+echo OK. All tests passed
-- 
cgit v1.2.3-59-g8ed1b


From 37739d1b4fe744da9c2f342224000ae7fbb5c063 Mon Sep 17 00:00:00 2001
From: Michal Rostecki <mrostecki@opensuse.org>
Date: Thu, 23 May 2019 14:53:54 +0200
Subject: selftests: bpf: Move bpf_printk to bpf_helpers.h

bpf_printk is a macro which is commonly used to print out debug messages
in BPF programs and it was copied in many selftests and samples. Since
all of them include bpf_helpers.h, this change moves the macro there.

Signed-off-by: Michal Rostecki <mrostecki@opensuse.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_helpers.h                | 8 ++++++++
 tools/testing/selftests/bpf/progs/sockmap_parse_prog.c   | 7 -------
 tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c | 7 -------
 tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c | 7 -------
 tools/testing/selftests/bpf/progs/test_lwt_seg6local.c   | 7 -------
 tools/testing/selftests/bpf/progs/test_xdp_noinline.c    | 7 -------
 tools/testing/selftests/bpf/test_sockmap_kern.h          | 7 -------
 7 files changed, 8 insertions(+), 42 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 5f6f9e7aba2a..4b27840b8109 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -8,6 +8,14 @@
  */
 #define SEC(NAME) __attribute__((section(NAME), used))
 
+/* helper macro to print out debug messages */
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			 ##__VA_ARGS__);		\
+})
+
 /* helper functions called from eBPF programs written in C */
 static void *(*bpf_map_lookup_elem)(void *map, const void *key) =
 	(void *) BPF_FUNC_map_lookup_elem;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index 0f92858f6226..ed3e4a551c57 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -5,13 +5,6 @@
 
 int _version SEC("version") = 1;
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index 12a7b5c82ed6..65fbfdb6cd3a 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -5,13 +5,6 @@
 
 int _version SEC("version") = 1;
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 SEC("sk_msg1")
 int bpf_prog1(struct sk_msg_md *msg)
 {
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index 2ce7634a4012..bdc22be46f2e 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -5,13 +5,6 @@
 
 int _version SEC("version") = 1;
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 struct bpf_map_def SEC("maps") sock_map_rx = {
 	.type = BPF_MAP_TYPE_SOCKMAP,
 	.key_size = sizeof(int),
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index 0575751bc1bc..7c7cb3177463 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -6,13 +6,6 @@
 #include "bpf_helpers.h"
 #include "bpf_endian.h"
 
-#define bpf_printk(fmt, ...)				\
-({							\
-	char ____fmt[] = fmt;				\
-	bpf_trace_printk(____fmt, sizeof(____fmt),	\
-			##__VA_ARGS__);			\
-})
-
 /* Packet parsing state machine helpers. */
 #define cursor_advance(_cursor, _len) \
 	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 5e4aac74f9d0..4fe6aaad22a4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -15,13 +15,6 @@
 #include <linux/udp.h>
 #include "bpf_helpers.h"
 
-#define bpf_printk(fmt, ...)				\
-({							\
-	char ____fmt[] = fmt;				\
-	bpf_trace_printk(____fmt, sizeof(____fmt),	\
-			##__VA_ARGS__);			\
-})
-
 static __u32 rol32(__u32 word, unsigned int shift)
 {
 	return (word << shift) | (word >> ((-shift) & 31));
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index e7639f66a941..4e7d3da21357 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -28,13 +28,6 @@
  * are established and verdicts are decided.
  */
 
-#define bpf_printk(fmt, ...)					\
-({								\
-	       char ____fmt[] = fmt;				\
-	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
-				##__VA_ARGS__);			\
-})
-
 struct bpf_map_def SEC("maps") sock_map = {
 	.type = TEST_MAP_TYPE,
 	.key_size = sizeof(int),
-- 
cgit v1.2.3-59-g8ed1b


From 1d7a08b3bdaec1e25ba7979ff598272b7e34318f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:58:56 -0700
Subject: libbpf: ensure libbpf.h is included along libbpf_internal.h

libbpf_internal.h expects a bunch of stuff defined in libbpf.h to be
defined. This patch makes sure that libbpf.h is always included.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf_internal.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index f3025b4d90e1..850f7bdec5cb 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -9,6 +9,8 @@
 #ifndef __LIBBPF_LIBBPF_INTERNAL_H
 #define __LIBBPF_LIBBPF_INTERNAL_H
 
+#include "libbpf.h"
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
-- 
cgit v1.2.3-59-g8ed1b


From e6c64855fd7ad565d87b2bd617f4a50d3bdad82f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:58:57 -0700
Subject: libbpf: add btf__parse_elf API to load .BTF and .BTF.ext

Loading BTF and BTF.ext from ELF file is a common need. Instead of
requiring every user to re-implement it, let's provide this API from
libbpf itself. It's mostly copy/paste from `bpftool btf dump`
implementation, which will be switched to libbpf's version in next
patch. btf__parse_elf allows to load BTF and optionally BTF.ext.
This is also useful for tests that need to load/work with BTF, loaded
from test ELF files.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c      | 128 +++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h      |   2 +
 tools/lib/bpf/libbpf.map |   5 ++
 3 files changed, 135 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 03348c4d6bd4..6139550810a1 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4,10 +4,12 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <linux/err.h>
 #include <linux/btf.h>
+#include <gelf.h>
 #include "btf.h"
 #include "bpf.h"
 #include "libbpf.h"
@@ -417,6 +419,132 @@ done:
 	return btf;
 }
 
+static bool btf_check_endianness(const GElf_Ehdr *ehdr)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+	Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
+	int err = 0, fd = -1, idx = 0;
+	struct btf *btf = NULL;
+	Elf_Scn *scn = NULL;
+	Elf *elf = NULL;
+	GElf_Ehdr ehdr;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_warning("failed to init libelf for %s\n", path);
+		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		err = -errno;
+		pr_warning("failed to open %s: %s\n", path, strerror(errno));
+		return ERR_PTR(err);
+	}
+
+	err = -LIBBPF_ERRNO__FORMAT;
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf) {
+		pr_warning("failed to open %s as ELF file\n", path);
+		goto done;
+	}
+	if (!gelf_getehdr(elf, &ehdr)) {
+		pr_warning("failed to get EHDR from %s\n", path);
+		goto done;
+	}
+	if (!btf_check_endianness(&ehdr)) {
+		pr_warning("non-native ELF endianness is not supported\n");
+		goto done;
+	}
+	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+		pr_warning("failed to get e_shstrndx from %s\n", path);
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn)) != NULL) {
+		GElf_Shdr sh;
+		char *name;
+
+		idx++;
+		if (gelf_getshdr(scn, &sh) != &sh) {
+			pr_warning("failed to get section(%d) header from %s\n",
+				   idx, path);
+			goto done;
+		}
+		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!name) {
+			pr_warning("failed to get section(%d) name from %s\n",
+				   idx, path);
+			goto done;
+		}
+		if (strcmp(name, BTF_ELF_SEC) == 0) {
+			btf_data = elf_getdata(scn, 0);
+			if (!btf_data) {
+				pr_warning("failed to get section(%d, %s) data from %s\n",
+					   idx, name, path);
+				goto done;
+			}
+			continue;
+		} else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+			btf_ext_data = elf_getdata(scn, 0);
+			if (!btf_ext_data) {
+				pr_warning("failed to get section(%d, %s) data from %s\n",
+					   idx, name, path);
+				goto done;
+			}
+			continue;
+		}
+	}
+
+	err = 0;
+
+	if (!btf_data) {
+		err = -ENOENT;
+		goto done;
+	}
+	btf = btf__new(btf_data->d_buf, btf_data->d_size);
+	if (IS_ERR(btf))
+		goto done;
+
+	if (btf_ext && btf_ext_data) {
+		*btf_ext = btf_ext__new(btf_ext_data->d_buf,
+					btf_ext_data->d_size);
+		if (IS_ERR(*btf_ext))
+			goto done;
+	} else if (btf_ext) {
+		*btf_ext = NULL;
+	}
+done:
+	if (elf)
+		elf_end(elf);
+	close(fd);
+
+	if (err)
+		return ERR_PTR(err);
+	/*
+	 * btf is always parsed before btf_ext, so no need to clean up
+	 * btf_ext, if btf loading failed
+	 */
+	if (IS_ERR(btf))
+		return btf;
+	if (btf_ext && IS_ERR(*btf_ext)) {
+		btf__free(btf);
+		err = PTR_ERR(*btf_ext);
+		return ERR_PTR(err);
+	}
+	return btf;
+}
+
 static int compare_vsi_off(const void *_a, const void *_b)
 {
 	const struct btf_var_secinfo *a = _a;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index c7b399e81fce..bded210df9e8 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -59,6 +59,8 @@ struct btf_ext_header {
 
 LIBBPF_API void btf__free(struct btf *btf);
 LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf *btf__parse_elf(const char *path,
+				      struct btf_ext **btf_ext);
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
 LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 673001787cba..6ea5ce19b9e0 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -164,3 +164,8 @@ LIBBPF_0.0.3 {
 		bpf_map_freeze;
 		btf__finalize_data;
 } LIBBPF_0.0.2;
+
+LIBBPF_0.0.4 {
+	global:
+		btf__parse_elf;
+} LIBBPF_0.0.3;
-- 
cgit v1.2.3-59-g8ed1b


From 58650cc47382af7d87282f53a5b508ac4bf734af Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:58:58 -0700
Subject: bpftool: use libbpf's btf__parse_elf API

Use btf__parse_elf() API, provided by libbpf, instead of implementing
ELF parsing by itself.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf.c | 117 ++++--------------------------------------------
 1 file changed, 8 insertions(+), 109 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 7317438ecd9e..a22ef6587ebe 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -8,8 +8,8 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
-#include <gelf.h>
 #include <bpf.h>
+#include <libbpf.h>
 #include <linux/btf.h>
 
 #include "btf.h"
@@ -340,112 +340,6 @@ static int dump_btf_raw(const struct btf *btf,
 	return 0;
 }
 
-static bool check_btf_endianness(GElf_Ehdr *ehdr)
-{
-	static unsigned int const endian = 1;
-
-	switch (ehdr->e_ident[EI_DATA]) {
-	case ELFDATA2LSB:
-		return *(unsigned char const *)&endian == 1;
-	case ELFDATA2MSB:
-		return *(unsigned char const *)&endian == 0;
-	default:
-		return 0;
-	}
-}
-
-static int btf_load_from_elf(const char *path, struct btf **btf)
-{
-	int err = -1, fd = -1, idx = 0;
-	Elf_Data *btf_data = NULL;
-	Elf_Scn *scn = NULL;
-	Elf *elf = NULL;
-	GElf_Ehdr ehdr;
-
-	if (elf_version(EV_CURRENT) == EV_NONE) {
-		p_err("failed to init libelf for %s", path);
-		return -1;
-	}
-
-	fd = open(path, O_RDONLY);
-	if (fd < 0) {
-		p_err("failed to open %s: %s", path, strerror(errno));
-		return -1;
-	}
-
-	elf = elf_begin(fd, ELF_C_READ, NULL);
-	if (!elf) {
-		p_err("failed to open %s as ELF file", path);
-		goto done;
-	}
-	if (!gelf_getehdr(elf, &ehdr)) {
-		p_err("failed to get EHDR from %s", path);
-		goto done;
-	}
-	if (!check_btf_endianness(&ehdr)) {
-		p_err("non-native ELF endianness is not supported");
-		goto done;
-	}
-	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
-		p_err("failed to get e_shstrndx from %s\n", path);
-		goto done;
-	}
-
-	while ((scn = elf_nextscn(elf, scn)) != NULL) {
-		GElf_Shdr sh;
-		char *name;
-
-		idx++;
-		if (gelf_getshdr(scn, &sh) != &sh) {
-			p_err("failed to get section(%d) header from %s",
-			      idx, path);
-			goto done;
-		}
-		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
-		if (!name) {
-			p_err("failed to get section(%d) name from %s",
-			      idx, path);
-			goto done;
-		}
-		if (strcmp(name, BTF_ELF_SEC) == 0) {
-			btf_data = elf_getdata(scn, 0);
-			if (!btf_data) {
-				p_err("failed to get section(%d, %s) data from %s",
-				      idx, name, path);
-				goto done;
-			}
-			break;
-		}
-	}
-
-	if (!btf_data) {
-		p_err("%s ELF section not found in %s", BTF_ELF_SEC, path);
-		goto done;
-	}
-
-	*btf = btf__new(btf_data->d_buf, btf_data->d_size);
-	if (IS_ERR(*btf)) {
-		err = PTR_ERR(*btf);
-		*btf = NULL;
-		p_err("failed to load BTF data from %s: %s",
-		      path, strerror(err));
-		goto done;
-	}
-
-	err = 0;
-done:
-	if (err) {
-		if (*btf) {
-			btf__free(*btf);
-			*btf = NULL;
-		}
-	}
-	if (elf)
-		elf_end(elf);
-	close(fd);
-	return err;
-}
-
 static int do_dump(int argc, char **argv)
 {
 	struct btf *btf = NULL;
@@ -522,9 +416,14 @@ static int do_dump(int argc, char **argv)
 		}
 		NEXT_ARG();
 	} else if (is_prefix(src, "file")) {
-		err = btf_load_from_elf(*argv, &btf);
-		if (err)
+		btf = btf__parse_elf(*argv, NULL);
+		if (IS_ERR(btf)) {
+			err = PTR_ERR(btf);
+			btf = NULL;
+			p_err("failed to load BTF from %s: %s", 
+			      *argv, strerror(err));
 			goto done;
+		}
 		NEXT_ARG();
 	} else {
 		err = -1;
-- 
cgit v1.2.3-59-g8ed1b


From 9db324314d29442c8bb8212dd40a3bb26f86c1c9 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:58:59 -0700
Subject: selftests/bpf: use btf__parse_elf to check presence of BTF/BTF.ext

Switch test_btf.c to rely on btf__parse_elf to check presence of BTF and
BTF.ext data, instead of implementing its own ELF parsing.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c | 71 +++++++---------------------------
 1 file changed, 13 insertions(+), 58 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 42c1ce988945..289daf54dec4 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4025,62 +4025,13 @@ static struct btf_file_test file_tests[] = {
 },
 };
 
-static int file_has_btf_elf(const char *fn, bool *has_btf_ext)
-{
-	Elf_Scn *scn = NULL;
-	GElf_Ehdr ehdr;
-	int ret = 0;
-	int elf_fd;
-	Elf *elf;
-
-	if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
-		  "elf_version(EV_CURRENT) == EV_NONE"))
-		return -1;
-
-	elf_fd = open(fn, O_RDONLY);
-	if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
-		return -1;
-
-	elf = elf_begin(elf_fd, ELF_C_READ, NULL);
-	if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
-		ret = -1;
-		goto done;
-	}
-
-	if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
-		ret = -1;
-		goto done;
-	}
-
-	while ((scn = elf_nextscn(elf, scn))) {
-		const char *sh_name;
-		GElf_Shdr sh;
-
-		if (CHECK(gelf_getshdr(scn, &sh) != &sh,
-			  "file:%s gelf_getshdr != &sh", fn)) {
-			ret = -1;
-			goto done;
-		}
-
-		sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
-		if (!strcmp(sh_name, BTF_ELF_SEC))
-			ret = 1;
-		if (!strcmp(sh_name, BTF_EXT_ELF_SEC))
-			*has_btf_ext = true;
-	}
-
-done:
-	close(elf_fd);
-	elf_end(elf);
-	return ret;
-}
-
 static int do_test_file(unsigned int test_num)
 {
 	const struct btf_file_test *test = &file_tests[test_num - 1];
 	const char *expected_fnames[] = {"_dummy_tracepoint",
 					 "test_long_fname_1",
 					 "test_long_fname_2"};
+	struct btf_ext *btf_ext = NULL;
 	struct bpf_prog_info info = {};
 	struct bpf_object *obj = NULL;
 	struct bpf_func_info *finfo;
@@ -4095,15 +4046,19 @@ static int do_test_file(unsigned int test_num)
 	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
 		test->file);
 
-	err = file_has_btf_elf(test->file, &has_btf_ext);
-	if (err == -1)
-		return err;
-
-	if (err == 0) {
-		fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
-		skip_cnt++;
-		return 0;
+	btf = btf__parse_elf(test->file, &btf_ext);
+	if (IS_ERR(btf)) {
+		if (PTR_ERR(btf) == -ENOENT) {
+			fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+			skip_cnt++;
+			return 0;
+		}
+		return PTR_ERR(btf);
 	}
+	btf__free(btf);
+
+	has_btf_ext = btf_ext != NULL;
+	btf_ext__free(btf_ext);
 
 	obj = bpf_object__open(test->file);
 	if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
-- 
cgit v1.2.3-59-g8ed1b


From e3b924224028c6fc31545e3812eecbe2ddbf35f6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:00 -0700
Subject: libbpf: add resizable non-thread safe internal hashmap

There is a need for fast point lookups inside libbpf for multiple use
cases (e.g., name resolution for BTF-to-C conversion, by-name lookups in
BTF for upcoming BPF CO-RE relocation support, etc). This patch
implements simple resizable non-thread safe hashmap using single linked
list chains.

Four different insert strategies are supported:
 - HASHMAP_ADD - only add key/value if key doesn't exist yet;
 - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
   update value;
 - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
   nothing and return -ENOENT;
 - HASHMAP_APPEND - always add key/value pair, even if key already exists.
   This turns hashmap into a multimap by allowing multiple values to be
   associated with the same key. Most useful read API for such hashmap is
   hashmap__for_each_key_entry() iteration. If hashmap__find() is still
   used, it will return last inserted key/value entry (first in a bucket
   chain).

For HASHMAP_SET and HASHMAP_UPDATE, old key/value pair is returned, so
that calling code can handle proper memory management, if necessary.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Build     |   2 +-
 tools/lib/bpf/hashmap.c | 229 ++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/hashmap.h | 173 ++++++++++++++++++++++++++++++++++++
 3 files changed, 403 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/bpf/hashmap.c
 create mode 100644 tools/lib/bpf/hashmap.h

(limited to 'tools')

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index ee9d5362f35b..dcf0d36598e0 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
new file mode 100644
index 000000000000..6122272943e6
--- /dev/null
+++ b/tools/lib/bpf/hashmap.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	entry->next = *pprev;
+	*pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	*pprev = entry->next;
+	entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx)
+{
+	map->hash_fn = hash_fn;
+	map->equal_fn = equal_fn;
+	map->ctx = ctx;
+
+	map->buckets = NULL;
+	map->cap = 0;
+	map->cap_bits = 0;
+	map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx)
+{
+	struct hashmap *map = malloc(sizeof(struct hashmap));
+
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+	hashmap__init(map, hash_fn, equal_fn, ctx);
+	return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+	free(map->buckets);
+	map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+	if (!map)
+		return;
+
+	hashmap__clear(map);
+	free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+	return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+	return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+	/* grow if empty or more than 75% filled */
+	return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+	struct hashmap_entry **new_buckets;
+	struct hashmap_entry *cur, *tmp;
+	size_t new_cap_bits, new_cap;
+	size_t h;
+	int bkt;
+
+	new_cap_bits = map->cap_bits + 1;
+	if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+		new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+	new_cap = 1UL << new_cap_bits;
+	new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+	if (!new_buckets)
+		return -ENOMEM;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+		hashmap_add_entry(&new_buckets[h], cur);
+	}
+
+	map->cap = new_cap;
+	map->cap_bits = new_cap_bits;
+	free(map->buckets);
+	map->buckets = new_buckets;
+
+	return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+			       const void *key, size_t hash,
+			       struct hashmap_entry ***pprev,
+			       struct hashmap_entry **entry)
+{
+	struct hashmap_entry *cur, **prev_ptr;
+
+	if (!map->buckets)
+		return false;
+
+	for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+	     cur;
+	     prev_ptr = &cur->next, cur = cur->next) {
+		if (map->equal_fn(cur->key, key, map->ctx)) {
+			if (pprev)
+				*pprev = prev_ptr;
+			*entry = cur;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+	int err;
+
+	if (old_key)
+		*old_key = NULL;
+	if (old_value)
+		*old_value = NULL;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (strategy != HASHMAP_APPEND &&
+	    hashmap_find_entry(map, key, h, NULL, &entry)) {
+		if (old_key)
+			*old_key = entry->key;
+		if (old_value)
+			*old_value = entry->value;
+
+		if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+			entry->key = key;
+			entry->value = value;
+			return 0;
+		} else if (strategy == HASHMAP_ADD) {
+			return -EEXIST;
+		}
+	}
+
+	if (strategy == HASHMAP_UPDATE)
+		return -ENOENT;
+
+	if (hashmap_needs_to_grow(map)) {
+		err = hashmap_grow(map);
+		if (err)
+			return err;
+		h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	}
+
+	entry = malloc(sizeof(struct hashmap_entry));
+	if (!entry)
+		return -ENOMEM;
+
+	entry->key = key;
+	entry->value = value;
+	hashmap_add_entry(&map->buckets[h], entry);
+	map->sz++;
+
+	return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, NULL, &entry))
+		return false;
+
+	if (value)
+		*value = entry->value;
+	return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value)
+{
+	struct hashmap_entry **pprev, *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+		return false;
+
+	if (old_key)
+		*old_key = entry->key;
+	if (old_value)
+		*old_value = entry->value;
+
+	hashmap_del_entry(pprev, entry);
+	free(entry);
+	map->sz--;
+
+	return true;
+}
+
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
new file mode 100644
index 000000000000..03748a742146
--- /dev/null
+++ b/tools/lib/bpf/hashmap.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include "libbpf_internal.h"
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+	/* shuffle bits and return requested number of upper bits */
+	return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+	const void *key;
+	void *value;
+	struct hashmap_entry *next;
+};
+
+struct hashmap {
+	hashmap_hash_fn hash_fn;
+	hashmap_equal_fn equal_fn;
+	void *ctx;
+
+	struct hashmap_entry **buckets;
+	size_t cap;
+	size_t cap_bits;
+	size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) {	\
+	.hash_fn = (hash_fn),			\
+	.equal_fn = (equal_fn),			\
+	.ctx = (ctx),				\
+	.buckets = NULL,			\
+	.cap = 0,				\
+	.cap_bits = 0,				\
+	.sz = 0,				\
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ *   update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ *   nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ *   This turns hashmap into a multimap by allowing multiple values to be
+ *   associated with the same key. Most useful read API for such hashmap is
+ *   hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ *   used, it will return last inserted key/value entry (first in a bucket
+ *   chain).
+ */
+enum hashmap_insert_strategy {
+	HASHMAP_ADD,
+	HASHMAP_SET,
+	HASHMAP_UPDATE,
+	HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+			       const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+			       const void *key, void *value,
+			       const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_SET,
+			       old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+				  const void *key, void *value,
+				  const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+			       old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+				  const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt)				    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt)		    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt];				    \
+		     cur && ({tmp = cur->next; true; });		    \
+		     cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key)			    \
+	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+					     map->cap_bits);		    \
+		     map->buckets ? map->buckets[bkt] : NULL; });	    \
+	     cur;							    \
+	     cur = cur->next)						    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)		    \
+	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+					     map->cap_bits);		    \
+		     cur = map->buckets ? map->buckets[bkt] : NULL; });	    \
+	     cur && ({ tmp = cur->next; true; });			    \
+	     cur = tmp)							    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
-- 
cgit v1.2.3-59-g8ed1b


From 5d04ec687cf98bc6e8073c646994736a46ed4e24 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:01 -0700
Subject: selftests/bpf: add tests for libbpf's hashmap

Test all APIs for internal hashmap implementation.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore     |   1 +
 tools/testing/selftests/bpf/Makefile       |   2 +-
 tools/testing/selftests/bpf/test_hashmap.c | 382 +++++++++++++++++++++++++++++
 3 files changed, 384 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_hashmap.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index dd5d69529382..138b6c063916 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -35,3 +35,4 @@ test_sysctl
 alu32
 libbpf.pc
 libbpf.so.*
+test_hashmap
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 66f2dca1dee1..ddae06498a00 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
-	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl
+	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c
new file mode 100644
index 000000000000..b64094c981e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_hashmap.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Tests for libbpf's hashmap.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+#define CHECK(condition, format...) ({					\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
+		fprintf(stderr, format);				\
+	}								\
+	__ret;								\
+})
+
+size_t hash_fn(const void *k, void *ctx)
+{
+	return (long)k;
+}
+
+bool equal_fn(const void *a, const void *b, void *ctx)
+{
+	return (long)a == (long)b;
+}
+
+static inline size_t next_pow_2(size_t n)
+{
+	size_t r = 1;
+
+	while (r < n)
+		r <<= 1;
+	return r;
+}
+
+static inline size_t exp_cap(size_t sz)
+{
+	size_t r = next_pow_2(sz);
+
+	if (sz * 4 / 3 > r)
+		r <<= 1;
+	return r;
+}
+
+#define ELEM_CNT 62
+
+int test_hashmap_generic(void)
+{
+	struct hashmap_entry *entry, *tmp;
+	int err, bkt, found_cnt, i;
+	long long found_msk;
+	struct hashmap *map;
+
+	fprintf(stderr, "%s: ", __func__);
+
+	map = hashmap__new(hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
+		return 1;
+
+	for (i = 0; i < ELEM_CNT; i++) {
+		const void *oldk, *k = (const void *)(long)i;
+		void *oldv, *v = (void *)(long)(1024 + i);
+
+		err = hashmap__update(map, k, v, &oldk, &oldv);
+		if (CHECK(err != -ENOENT, "unexpected result: %d\n", err))
+			return 1;
+
+		if (i % 2) {
+			err = hashmap__add(map, k, v);
+		} else {
+			err = hashmap__set(map, k, v, &oldk, &oldv);
+			if (CHECK(oldk != NULL || oldv != NULL,
+				  "unexpected k/v: %p=%p\n", oldk, oldv))
+				return 1;
+		}
+
+		if (CHECK(err, "failed to add k/v %ld = %ld: %d\n",
+			       (long)k, (long)v, err))
+			return 1;
+
+		if (CHECK(!hashmap__find(map, k, &oldv),
+			  "failed to find key %ld\n", (long)k))
+			return 1;
+		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
+			return 1;
+	}
+
+	if (CHECK(hashmap__size(map) != ELEM_CNT,
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+
+	found_msk = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		long k = (long)entry->key;
+		long v = (long)entry->value;
+
+		found_msk |= 1ULL << k;
+		if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v))
+			return 1;
+	}
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+		  "not all keys iterated: %llx\n", found_msk))
+		return 1;
+
+	for (i = 0; i < ELEM_CNT; i++) {
+		const void *oldk, *k = (const void *)(long)i;
+		void *oldv, *v = (void *)(long)(256 + i);
+
+		err = hashmap__add(map, k, v);
+		if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err))
+			return 1;
+
+		if (i % 2)
+			err = hashmap__update(map, k, v, &oldk, &oldv);
+		else
+			err = hashmap__set(map, k, v, &oldk, &oldv);
+
+		if (CHECK(err, "failed to update k/v %ld = %ld: %d\n",
+			       (long)k, (long)v, err))
+			return 1;
+		if (CHECK(!hashmap__find(map, k, &oldv),
+			  "failed to find key %ld\n", (long)k))
+			return 1;
+		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
+			return 1;
+	}
+
+	if (CHECK(hashmap__size(map) != ELEM_CNT,
+		  "invalid updated map size: %zu\n", hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+
+	found_msk = 0;
+	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
+		long k = (long)entry->key;
+		long v = (long)entry->value;
+
+		found_msk |= 1ULL << k;
+		if (CHECK(v - k != 256,
+			  "invalid updated k/v pair: %ld = %ld\n", k, v))
+			return 1;
+	}
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+		  "not all keys iterated after update: %llx\n", found_msk))
+		return 1;
+
+	found_cnt = 0;
+	hashmap__for_each_key_entry(map, entry, (void *)0) {
+		found_cnt++;
+	}
+	if (CHECK(!found_cnt, "didn't find any entries for key 0\n"))
+		return 1;
+
+	found_msk = 0;
+	found_cnt = 0;
+	hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
+		const void *oldk, *k;
+		void *oldv, *v;
+
+		k = entry->key;
+		v = entry->value;
+
+		found_cnt++;
+		found_msk |= 1ULL << (long)k;
+
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+			  "failed to delete k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			return 1;
+		if (CHECK(oldk != k || oldv != v,
+			  "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
+			  (long)k, (long)v, (long)oldk, (long)oldv))
+			return 1;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+			  "unexpectedly deleted k/v %ld = %ld\n",
+			  (long)oldk, (long)oldv))
+			return 1;
+	}
+
+	if (CHECK(!found_cnt || !found_msk,
+		  "didn't delete any key entries\n"))
+		return 1;
+	if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt,
+		  "invalid updated map size (already deleted: %d): %zu\n",
+		  found_cnt, hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+
+	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
+		const void *oldk, *k;
+		void *oldv, *v;
+
+		k = entry->key;
+		v = entry->value;
+
+		found_cnt++;
+		found_msk |= 1ULL << (long)k;
+
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+			  "failed to delete k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			return 1;
+		if (CHECK(oldk != k || oldv != v,
+			  "invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
+			  (long)k, (long)v, (long)oldk, (long)oldv))
+			return 1;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+			  "unexpectedly deleted k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			return 1;
+	}
+
+	if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
+		  "not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
+		  found_cnt, found_msk))
+		return 1;
+	if (CHECK(hashmap__size(map) != 0,
+		  "invalid updated map size (already deleted: %d): %zu\n",
+		  found_cnt, hashmap__size(map)))
+		return 1;
+
+	found_cnt = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "unexpected map entries left: %ld = %ld\n",
+			     (long)entry->key, (long)entry->value);
+		return 1;
+	}
+
+	hashmap__free(map);
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "unexpected map entries left: %ld = %ld\n",
+			     (long)entry->key, (long)entry->value);
+		return 1;
+	}
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
+
+size_t collision_hash_fn(const void *k, void *ctx)
+{
+	return 0;
+}
+
+int test_hashmap_multimap(void)
+{
+	void *k1 = (void *)0, *k2 = (void *)1;
+	struct hashmap_entry *entry;
+	struct hashmap *map;
+	long found_msk;
+	int err, bkt;
+
+	fprintf(stderr, "%s: ", __func__);
+
+	/* force collisions */
+	map = hashmap__new(collision_hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
+		return 1;
+
+
+	/* set up multimap:
+	 * [0] -> 1, 2, 4;
+	 * [1] -> 8, 16, 32;
+	 */
+	err = hashmap__append(map, k1, (void *)1);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k1, (void *)2);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k1, (void *)4);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+
+	err = hashmap__append(map, k2, (void *)8);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k2, (void *)16);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+	err = hashmap__append(map, k2, (void *)32);
+	if (CHECK(err, "failed to add k/v: %d\n", err))
+		return 1;
+
+	if (CHECK(hashmap__size(map) != 6,
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		return 1;
+
+	/* verify global iteration still works and sees all values */
+	found_msk = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (1 << 6) - 1,
+		  "not all keys iterated: %lx\n", found_msk))
+		return 1;
+
+	/* iterate values for key 1 */
+	found_msk = 0;
+	hashmap__for_each_key_entry(map, entry, k1) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (1 | 2 | 4),
+		  "invalid k1 values: %lx\n", found_msk))
+		return 1;
+
+	/* iterate values for key 2 */
+	found_msk = 0;
+	hashmap__for_each_key_entry(map, entry, k2) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (8 | 16 | 32),
+		  "invalid k2 values: %lx\n", found_msk))
+		return 1;
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
+
+int test_hashmap_empty()
+{
+	struct hashmap_entry *entry;
+	int bkt;
+	struct hashmap *map;
+	void *k = (void *)0;
+
+	fprintf(stderr, "%s: ", __func__);
+
+	/* force collisions */
+	map = hashmap__new(hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
+		return 1;
+
+	if (CHECK(hashmap__size(map) != 0,
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		return 1;
+	if (CHECK(hashmap__capacity(map) != 0,
+		  "invalid map capacity: %zu\n", hashmap__capacity(map)))
+		return 1;
+	if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n"))
+		return 1;
+	if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n"))
+		return 1;
+
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "unexpected iterated entry\n");
+		return 1;
+	}
+	hashmap__for_each_key_entry(map, entry, k) {
+		CHECK(false, "unexpected key entry\n");
+		return 1;
+	}
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	bool failed = false;
+
+	if (test_hashmap_generic())
+		failed = true;
+	if (test_hashmap_multimap())
+		failed = true;
+	if (test_hashmap_empty())
+		failed = true;
+
+	return failed;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 2fc3fc0bcdcc5429947d1e5ed8f6f0a77ba80997 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:02 -0700
Subject: libbpf: switch btf_dedup() to hashmap for dedup table

Utilize libbpf's hashmap as a multimap fof dedup_table implementation.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/btf.c | 201 ++++++++++++++++++++++------------------------------
 1 file changed, 85 insertions(+), 116 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 6139550810a1..b2478e98c367 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -14,6 +14,7 @@
 #include "bpf.h"
 #include "libbpf.h"
 #include "libbpf_internal.h"
+#include "hashmap.h"
 
 #define max(a, b) ((a) > (b) ? (a) : (b))
 #define min(a, b) ((a) < (b) ? (a) : (b))
@@ -1293,16 +1294,9 @@ done:
 	return err;
 }
 
-#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
-#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31
 #define BTF_UNPROCESSED_ID ((__u32)-1)
 #define BTF_IN_PROGRESS_ID ((__u32)-2)
 
-struct btf_dedup_node {
-	struct btf_dedup_node *next;
-	__u32 type_id;
-};
-
 struct btf_dedup {
 	/* .BTF section to be deduped in-place */
 	struct btf *btf;
@@ -1318,7 +1312,7 @@ struct btf_dedup {
 	 * candidates, which is fine because we rely on subsequent
 	 * btf_xxx_equal() checks to authoritatively verify type equality.
 	 */
-	struct btf_dedup_node **dedup_table;
+	struct hashmap *dedup_table;
 	/* Canonical types map */
 	__u32 *map;
 	/* Hypothetical mapping, used during type graph equivalence checks */
@@ -1343,30 +1337,18 @@ struct btf_str_ptrs {
 	__u32 cap;
 };
 
-static inline __u32 hash_combine(__u32 h, __u32 value)
+static long hash_combine(long h, long value)
 {
-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define GOLDEN_RATIO_PRIME 0x9e370001UL
-	return h * 37 + value * GOLDEN_RATIO_PRIME;
-#undef GOLDEN_RATIO_PRIME
+	return h * 31 + value;
 }
 
-#define for_each_dedup_cand(d, hash, node) \
-	for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
-	     node;							   \
-	     node = node->next)
+#define for_each_dedup_cand(d, node, hash) \
+	hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
 
-static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
+static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
 {
-	struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
-	int bucket = hash & (d->opts.dedup_table_size - 1);
-
-	if (!node)
-		return -ENOMEM;
-	node->type_id = type_id;
-	node->next = d->dedup_table[bucket];
-	d->dedup_table[bucket] = node;
-	return 0;
+	return hashmap__append(d->dedup_table,
+			       (void *)hash, (void *)(long)type_id);
 }
 
 static int btf_dedup_hypot_map_add(struct btf_dedup *d,
@@ -1395,36 +1377,10 @@ static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
 	d->hypot_cnt = 0;
 }
 
-static void btf_dedup_table_free(struct btf_dedup *d)
-{
-	struct btf_dedup_node *head, *tmp;
-	int i;
-
-	if (!d->dedup_table)
-		return;
-
-	for (i = 0; i < d->opts.dedup_table_size; i++) {
-		while (d->dedup_table[i]) {
-			tmp = d->dedup_table[i];
-			d->dedup_table[i] = tmp->next;
-			free(tmp);
-		}
-
-		head = d->dedup_table[i];
-		while (head) {
-			tmp = head;
-			head = head->next;
-			free(tmp);
-		}
-	}
-
-	free(d->dedup_table);
-	d->dedup_table = NULL;
-}
-
 static void btf_dedup_free(struct btf_dedup *d)
 {
-	btf_dedup_table_free(d);
+	hashmap__free(d->dedup_table);
+	d->dedup_table = NULL;
 
 	free(d->map);
 	d->map = NULL;
@@ -1438,40 +1394,43 @@ static void btf_dedup_free(struct btf_dedup *d)
 	free(d);
 }
 
-/* Find closest power of two >= to size, capped at 2^max_size_log */
-static __u32 roundup_pow2_max(__u32 size, int max_size_log)
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
 {
-	int i;
+	return (size_t)key;
+}
 
-	for (i = 0; i < max_size_log  && (1U << i) < size;  i++)
-		;
-	return 1U << i;
+static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+{
+	return 0;
 }
 
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+	return k1 == k2;
+}
 
 static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
 				       const struct btf_dedup_opts *opts)
 {
 	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
+	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
 	int i, err = 0;
-	__u32 sz;
 
 	if (!d)
 		return ERR_PTR(-ENOMEM);
 
 	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
-	sz = opts && opts->dedup_table_size ? opts->dedup_table_size
-					    : BTF_DEDUP_TABLE_DEFAULT_SIZE;
-	sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG);
-	d->opts.dedup_table_size = sz;
+	/* dedup_table_size is now used only to force collisions in tests */
+	if (opts && opts->dedup_table_size == 1)
+		hash_fn = btf_dedup_collision_hash_fn;
 
 	d->btf = btf;
 	d->btf_ext = btf_ext;
 
-	d->dedup_table = calloc(d->opts.dedup_table_size,
-				sizeof(struct btf_dedup_node *));
-	if (!d->dedup_table) {
-		err = -ENOMEM;
+	d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
+	if (IS_ERR(d->dedup_table)) {
+		err = PTR_ERR(d->dedup_table);
+		d->dedup_table = NULL;
 		goto done;
 	}
 
@@ -1790,9 +1749,9 @@ done:
 	return err;
 }
 
-static __u32 btf_hash_common(struct btf_type *t)
+static long btf_hash_common(struct btf_type *t)
 {
-	__u32 h;
+	long h;
 
 	h = hash_combine(0, t->name_off);
 	h = hash_combine(h, t->info);
@@ -1808,10 +1767,10 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
 }
 
 /* Calculate type signature hash of INT. */
-static __u32 btf_hash_int(struct btf_type *t)
+static long btf_hash_int(struct btf_type *t)
 {
 	__u32 info = *(__u32 *)(t + 1);
-	__u32 h;
+	long h;
 
 	h = btf_hash_common(t);
 	h = hash_combine(h, info);
@@ -1831,9 +1790,9 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
 }
 
 /* Calculate type signature hash of ENUM. */
-static __u32 btf_hash_enum(struct btf_type *t)
+static long btf_hash_enum(struct btf_type *t)
 {
-	__u32 h;
+	long h;
 
 	/* don't hash vlen and enum members to support enum fwd resolving */
 	h = hash_combine(0, t->name_off);
@@ -1885,11 +1844,11 @@ static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
  * as referenced type IDs equivalence is established separately during type
  * graph equivalence check algorithm.
  */
-static __u32 btf_hash_struct(struct btf_type *t)
+static long btf_hash_struct(struct btf_type *t)
 {
 	struct btf_member *member = (struct btf_member *)(t + 1);
 	__u32 vlen = BTF_INFO_VLEN(t->info);
-	__u32 h = btf_hash_common(t);
+	long h = btf_hash_common(t);
 	int i;
 
 	for (i = 0; i < vlen; i++) {
@@ -1932,10 +1891,10 @@ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
  * under assumption that they were already resolved to canonical type IDs and
  * are not going to change.
  */
-static __u32 btf_hash_array(struct btf_type *t)
+static long btf_hash_array(struct btf_type *t)
 {
 	struct btf_array *info = (struct btf_array *)(t + 1);
-	__u32 h = btf_hash_common(t);
+	long h = btf_hash_common(t);
 
 	h = hash_combine(h, info->type);
 	h = hash_combine(h, info->index_type);
@@ -1986,11 +1945,11 @@ static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
  * under assumption that they were already resolved to canonical type IDs and
  * are not going to change.
  */
-static inline __u32 btf_hash_fnproto(struct btf_type *t)
+static long btf_hash_fnproto(struct btf_type *t)
 {
 	struct btf_param *member = (struct btf_param *)(t + 1);
 	__u16 vlen = BTF_INFO_VLEN(t->info);
-	__u32 h = btf_hash_common(t);
+	long h = btf_hash_common(t);
 	int i;
 
 	for (i = 0; i < vlen; i++) {
@@ -2008,7 +1967,7 @@ static inline __u32 btf_hash_fnproto(struct btf_type *t)
  * This function is called during reference types deduplication to compare
  * FUNC_PROTO to potential canonical representative.
  */
-static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
+static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
 {
 	struct btf_param *m1, *m2;
 	__u16 vlen;
@@ -2034,7 +1993,7 @@ static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
  * IDs. This check is performed during type graph equivalence check and
  * referenced types equivalence is checked separately.
  */
-static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
+static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 {
 	struct btf_param *m1, *m2;
 	__u16 vlen;
@@ -2065,11 +2024,12 @@ static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
 static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 {
 	struct btf_type *t = d->btf->types[type_id];
+	struct hashmap_entry *hash_entry;
 	struct btf_type *cand;
-	struct btf_dedup_node *cand_node;
 	/* if we don't find equivalent type, then we are canonical */
 	__u32 new_id = type_id;
-	__u32 h;
+	__u32 cand_id;
+	long h;
 
 	switch (BTF_INFO_KIND(t->info)) {
 	case BTF_KIND_CONST:
@@ -2088,10 +2048,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_INT:
 		h = btf_hash_int(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_int(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2099,10 +2060,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 
 	case BTF_KIND_ENUM:
 		h = btf_hash_enum(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_enum(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 			if (d->opts.dont_resolve_fwds)
@@ -2110,21 +2072,22 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 			if (btf_compat_enum(t, cand)) {
 				if (btf_is_enum_fwd(t)) {
 					/* resolve fwd to full enum */
-					new_id = cand_node->type_id;
+					new_id = cand_id;
 					break;
 				}
 				/* resolve canonical enum fwd to full enum */
-				d->map[cand_node->type_id] = type_id;
+				d->map[cand_id] = type_id;
 			}
 		}
 		break;
 
 	case BTF_KIND_FWD:
 		h = btf_hash_common(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_common(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2525,12 +2488,12 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
  */
 static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 {
-	struct btf_dedup_node *cand_node;
 	struct btf_type *cand_type, *t;
+	struct hashmap_entry *hash_entry;
 	/* if we don't find equivalent type, then we are canonical */
 	__u32 new_id = type_id;
 	__u16 kind;
-	__u32 h;
+	long h;
 
 	/* already deduped or is in process of deduping (loop detected) */
 	if (d->map[type_id] <= BTF_MAX_NR_TYPES)
@@ -2543,7 +2506,8 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		return 0;
 
 	h = btf_hash_struct(t);
-	for_each_dedup_cand(d, h, cand_node) {
+	for_each_dedup_cand(d, hash_entry, h) {
+		__u32 cand_id = (__u32)(long)hash_entry->value;
 		int eq;
 
 		/*
@@ -2556,17 +2520,17 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
 		 * FWD and compatible STRUCT/UNION are considered equivalent.
 		 */
-		cand_type = d->btf->types[cand_node->type_id];
+		cand_type = d->btf->types[cand_id];
 		if (!btf_shallow_equal_struct(t, cand_type))
 			continue;
 
 		btf_dedup_clear_hypot_map(d);
-		eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id);
+		eq = btf_dedup_is_equiv(d, type_id, cand_id);
 		if (eq < 0)
 			return eq;
 		if (!eq)
 			continue;
-		new_id = cand_node->type_id;
+		new_id = cand_id;
 		btf_dedup_merge_hypot_map(d);
 		break;
 	}
@@ -2616,12 +2580,12 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
  */
 static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 {
-	struct btf_dedup_node *cand_node;
+	struct hashmap_entry *hash_entry;
+	__u32 new_id = type_id, cand_id;
 	struct btf_type *t, *cand;
 	/* if we don't find equivalent type, then we are representative type */
-	__u32 new_id = type_id;
 	int ref_type_id;
-	__u32 h;
+	long h;
 
 	if (d->map[type_id] == BTF_IN_PROGRESS_ID)
 		return -ELOOP;
@@ -2644,10 +2608,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		t->type = ref_type_id;
 
 		h = btf_hash_common(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_common(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2667,10 +2632,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		info->index_type = ref_type_id;
 
 		h = btf_hash_array(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_array(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2698,10 +2664,11 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		}
 
 		h = btf_hash_fnproto(t);
-		for_each_dedup_cand(d, h, cand_node) {
-			cand = d->btf->types[cand_node->type_id];
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = d->btf->types[cand_id];
 			if (btf_equal_fnproto(t, cand)) {
-				new_id = cand_node->type_id;
+				new_id = cand_id;
 				break;
 			}
 		}
@@ -2728,7 +2695,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)
 		if (err < 0)
 			return err;
 	}
-	btf_dedup_table_free(d);
+	/* we won't need d->dedup_table anymore */
+	hashmap__free(d->dedup_table);
+	d->dedup_table = NULL;
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 351131b51c7a27daf0fbdce80b619b8d130374c6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:03 -0700
Subject: libbpf: add btf_dump API for BTF-to-C conversion

BTF contains enough type information to allow generating valid
compilable C header w/ correct layout of structs/unions and all the
typedef/enum definitions. This patch adds a new "object" - btf_dump to
facilitate dumping BTF as valid C. btf_dump__dump_type() is the main API
which takes care of dumping out (through user-provided printf-like
callback function) C definitions for given type ID and it's required
dependencies. This allows for not just dumping out entirety of BTF types,
but also selective filtering based on user-provided criterias w/ minimal
set of dependent types.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Build      |    4 +-
 tools/lib/bpf/btf.h      |   17 +
 tools/lib/bpf/btf_dump.c | 1336 ++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.map |    3 +
 4 files changed, 1359 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/bpf/btf_dump.c

(limited to 'tools')

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index dcf0d36598e0..e3962cfbc9a6 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1,3 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
+	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
+	    btf_dump.o
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index bded210df9e8..ba4ffa831aa4 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -4,6 +4,7 @@
 #ifndef __LIBBPF_BTF_H
 #define __LIBBPF_BTF_H
 
+#include <stdarg.h>
 #include <linux/types.h>
 
 #ifdef __cplusplus
@@ -102,6 +103,22 @@ struct btf_dedup_opts {
 LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 			  const struct btf_dedup_opts *opts);
 
+struct btf_dump;
+
+struct btf_dump_opts {
+	void *ctx;
+};
+
+typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
+
+LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
+					  const struct btf_ext *btf_ext,
+					  const struct btf_dump_opts *opts,
+					  btf_dump_printf_fn_t printf_fn);
+LIBBPF_API void btf_dump__free(struct btf_dump *d);
+
+LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
new file mode 100644
index 000000000000..4b22db77e2cc
--- /dev/null
+++ b/tools/lib/bpf/btf_dump.c
@@ -0,0 +1,1336 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C type converter.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "hashmap.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+#define max(x, y) ((x) < (y) ? (y) : (x))
+
+static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
+static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
+
+static const char *pfx(int lvl)
+{
+	return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl];
+}
+
+enum btf_dump_type_order_state {
+	NOT_ORDERED,
+	ORDERING,
+	ORDERED,
+};
+
+enum btf_dump_type_emit_state {
+	NOT_EMITTED,
+	EMITTING,
+	EMITTED,
+};
+
+/* per-type auxiliary state */
+struct btf_dump_type_aux_state {
+	/* topological sorting state */
+	enum btf_dump_type_order_state order_state: 2;
+	/* emitting state used to determine the need for forward declaration */
+	enum btf_dump_type_emit_state emit_state: 2;
+	/* whether forward declaration was already emitted */
+	__u8 fwd_emitted: 1;
+	/* whether unique non-duplicate name was already assigned */
+	__u8 name_resolved: 1;
+};
+
+struct btf_dump {
+	const struct btf *btf;
+	const struct btf_ext *btf_ext;
+	btf_dump_printf_fn_t printf_fn;
+	struct btf_dump_opts opts;
+
+	/* per-type auxiliary state */
+	struct btf_dump_type_aux_state *type_states;
+	/* per-type optional cached unique name, must be freed, if present */
+	const char **cached_names;
+
+	/* topo-sorted list of dependent type definitions */
+	__u32 *emit_queue;
+	int emit_queue_cap;
+	int emit_queue_cnt;
+
+	/*
+	 * stack of type declarations (e.g., chain of modifiers, arrays,
+	 * funcs, etc)
+	 */
+	__u32 *decl_stack;
+	int decl_stack_cap;
+	int decl_stack_cnt;
+
+	/* maps struct/union/enum name to a number of name occurrences */
+	struct hashmap *type_names;
+	/*
+	 * maps typedef identifiers and enum value names to a number of such
+	 * name occurrences
+	 */
+	struct hashmap *ident_names;
+};
+
+static size_t str_hash_fn(const void *key, void *ctx)
+{
+	const char *s = key;
+	size_t h = 0;
+
+	while (*s) {
+		h = h * 31 + *s;
+		s++;
+	}
+	return h;
+}
+
+static bool str_equal_fn(const void *a, const void *b, void *ctx)
+{
+	return strcmp(a, b) == 0;
+}
+
+static __u16 btf_kind_of(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info);
+}
+
+static __u16 btf_vlen_of(const struct btf_type *t)
+{
+	return BTF_INFO_VLEN(t->info);
+}
+
+static bool btf_kflag_of(const struct btf_type *t)
+{
+	return BTF_INFO_KFLAG(t->info);
+}
+
+static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
+{
+	return btf__name_by_offset(d->btf, name_off);
+}
+
+static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	d->printf_fn(d->opts.ctx, fmt, args);
+	va_end(args);
+}
+
+struct btf_dump *btf_dump__new(const struct btf *btf,
+			       const struct btf_ext *btf_ext,
+			       const struct btf_dump_opts *opts,
+			       btf_dump_printf_fn_t printf_fn)
+{
+	struct btf_dump *d;
+	int err;
+
+	d = calloc(1, sizeof(struct btf_dump));
+	if (!d)
+		return ERR_PTR(-ENOMEM);
+
+	d->btf = btf;
+	d->btf_ext = btf_ext;
+	d->printf_fn = printf_fn;
+	d->opts.ctx = opts ? opts->ctx : NULL;
+
+	d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (IS_ERR(d->type_names)) {
+		err = PTR_ERR(d->type_names);
+		d->type_names = NULL;
+		btf_dump__free(d);
+		return ERR_PTR(err);
+	}
+	d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+	if (IS_ERR(d->ident_names)) {
+		err = PTR_ERR(d->ident_names);
+		d->ident_names = NULL;
+		btf_dump__free(d);
+		return ERR_PTR(err);
+	}
+
+	return d;
+}
+
+void btf_dump__free(struct btf_dump *d)
+{
+	int i, cnt;
+
+	if (!d)
+		return;
+
+	free(d->type_states);
+	if (d->cached_names) {
+		/* any set cached name is owned by us and should be freed */
+		for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
+			if (d->cached_names[i])
+				free((void *)d->cached_names[i]);
+		}
+	}
+	free(d->cached_names);
+	free(d->emit_queue);
+	free(d->decl_stack);
+	hashmap__free(d->type_names);
+	hashmap__free(d->ident_names);
+
+	free(d);
+}
+
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
+
+/*
+ * Dump BTF type in a compilable C syntax, including all the necessary
+ * dependent types, necessary for compilation. If some of the dependent types
+ * were already emitted as part of previous btf_dump__dump_type() invocation
+ * for another type, they won't be emitted again. This API allows callers to
+ * filter out BTF types according to user-defined criterias and emitted only
+ * minimal subset of types, necessary to compile everything. Full struct/union
+ * definitions will still be emitted, even if the only usage is through
+ * pointer and could be satisfied with just a forward declaration.
+ *
+ * Dumping is done in two high-level passes:
+ *   1. Topologically sort type definitions to satisfy C rules of compilation.
+ *   2. Emit type definitions in C syntax.
+ *
+ * Returns 0 on success; <0, otherwise.
+ */
+int btf_dump__dump_type(struct btf_dump *d, __u32 id)
+{
+	int err, i;
+
+	if (id > btf__get_nr_types(d->btf))
+		return -EINVAL;
+
+	/* type states are lazily allocated, as they might not be needed */
+	if (!d->type_states) {
+		d->type_states = calloc(1 + btf__get_nr_types(d->btf),
+					sizeof(d->type_states[0]));
+		if (!d->type_states)
+			return -ENOMEM;
+		d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
+					 sizeof(d->cached_names[0]));
+		if (!d->cached_names)
+			return -ENOMEM;
+
+		/* VOID is special */
+		d->type_states[0].order_state = ORDERED;
+		d->type_states[0].emit_state = EMITTED;
+	}
+
+	d->emit_queue_cnt = 0;
+	err = btf_dump_order_type(d, id, false);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < d->emit_queue_cnt; i++)
+		btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
+
+	return 0;
+}
+
+static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
+{
+	__u32 *new_queue;
+	size_t new_cap;
+
+	if (d->emit_queue_cnt >= d->emit_queue_cap) {
+		new_cap = max(16, d->emit_queue_cap * 3 / 2);
+		new_queue = realloc(d->emit_queue,
+				    new_cap * sizeof(new_queue[0]));
+		if (!new_queue)
+			return -ENOMEM;
+		d->emit_queue = new_queue;
+		d->emit_queue_cap = new_cap;
+	}
+
+	d->emit_queue[d->emit_queue_cnt++] = id;
+	return 0;
+}
+
+/*
+ * Determine order of emitting dependent types and specified type to satisfy
+ * C compilation rules.  This is done through topological sorting with an
+ * additional complication which comes from C rules. The main idea for C is
+ * that if some type is "embedded" into a struct/union, it's size needs to be
+ * known at the time of definition of containing type. E.g., for:
+ *
+ *	struct A {};
+ *	struct B { struct A x; }
+ *
+ * struct A *HAS* to be defined before struct B, because it's "embedded",
+ * i.e., it is part of struct B layout. But in the following case:
+ *
+ *	struct A;
+ *	struct B { struct A *x; }
+ *	struct A {};
+ *
+ * it's enough to just have a forward declaration of struct A at the time of
+ * struct B definition, as struct B has a pointer to struct A, so the size of
+ * field x is known without knowing struct A size: it's sizeof(void *).
+ *
+ * Unfortunately, there are some trickier cases we need to handle, e.g.:
+ *
+ *	struct A {}; // if this was forward-declaration: compilation error
+ *	struct B {
+ *		struct { // anonymous struct
+ *			struct A y;
+ *		} *x;
+ *	};
+ *
+ * In this case, struct B's field x is a pointer, so it's size is known
+ * regardless of the size of (anonymous) struct it points to. But because this
+ * struct is anonymous and thus defined inline inside struct B, *and* it
+ * embeds struct A, compiler requires full definition of struct A to be known
+ * before struct B can be defined. This creates a transitive dependency
+ * between struct A and struct B. If struct A was forward-declared before
+ * struct B definition and fully defined after struct B definition, that would
+ * trigger compilation error.
+ *
+ * All this means that while we are doing topological sorting on BTF type
+ * graph, we need to determine relationships between different types (graph
+ * nodes):
+ *   - weak link (relationship) between X and Y, if Y *CAN* be
+ *   forward-declared at the point of X definition;
+ *   - strong link, if Y *HAS* to be fully-defined before X can be defined.
+ *
+ * The rule is as follows. Given a chain of BTF types from X to Y, if there is
+ * BTF_KIND_PTR type in the chain and at least one non-anonymous type
+ * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
+ * Weak/strong relationship is determined recursively during DFS traversal and
+ * is returned as a result from btf_dump_order_type().
+ *
+ * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
+ * but it is not guaranteeing that no extraneous forward declarations will be
+ * emitted.
+ *
+ * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
+ * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
+ * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
+ * entire graph path, so depending where from one came to that BTF type, it
+ * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
+ * once they are processed, there is no need to do it again, so they are
+ * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
+ * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
+ * in any case, once those are processed, no need to do it again, as the
+ * result won't change.
+ *
+ * Returns:
+ *   - 1, if type is part of strong link (so there is strong topological
+ *   ordering requirements);
+ *   - 0, if type is part of weak link (so can be satisfied through forward
+ *   declaration);
+ *   - <0, on error (e.g., unsatisfiable type loop detected).
+ */
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
+{
+	/*
+	 * Order state is used to detect strong link cycles, but only for BTF
+	 * kinds that are or could be an independent definition (i.e.,
+	 * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays,
+	 * func_protos, modifiers are just means to get to these definitions.
+	 * Int/void don't need definitions, they are assumed to be always
+	 * properly defined.  We also ignore datasec, var, and funcs for now.
+	 * So for all non-defining kinds, we never even set ordering state,
+	 * for defining kinds we set ORDERING and subsequently ORDERED if it
+	 * forms a strong link.
+	 */
+	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+	const struct btf_type *t;
+	__u16 kind, vlen;
+	int err, i;
+
+	/* return true, letting typedefs know that it's ok to be emitted */
+	if (tstate->order_state == ORDERED)
+		return 1;
+
+	t = btf__type_by_id(d->btf, id);
+	kind = btf_kind_of(t);
+
+	if (tstate->order_state == ORDERING) {
+		/* type loop, but resolvable through fwd declaration */
+		if ((kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION) &&
+		    through_ptr && t->name_off != 0)
+			return 0;
+		pr_warning("unsatisfiable type cycle, id:[%u]\n", id);
+		return -ELOOP;
+	}
+
+	switch (kind) {
+	case BTF_KIND_INT:
+		tstate->order_state = ORDERED;
+		return 0;
+
+	case BTF_KIND_PTR:
+		err = btf_dump_order_type(d, t->type, true);
+		tstate->order_state = ORDERED;
+		return err;
+
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *a = (void *)(t + 1);
+
+		return btf_dump_order_type(d, a->type, through_ptr);
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = (void *)(t + 1);
+		/*
+		 * struct/union is part of strong link, only if it's embedded
+		 * (so no ptr in a path) or it's anonymous (so has to be
+		 * defined inline, even if declared through ptr)
+		 */
+		if (through_ptr && t->name_off != 0)
+			return 0;
+
+		tstate->order_state = ORDERING;
+
+		vlen = btf_vlen_of(t);
+		for (i = 0; i < vlen; i++, m++) {
+			err = btf_dump_order_type(d, m->type, false);
+			if (err < 0)
+				return err;
+		}
+
+		if (t->name_off != 0) {
+			err = btf_dump_add_emit_queue_id(d, id);
+			if (err < 0)
+				return err;
+		}
+
+		tstate->order_state = ORDERED;
+		return 1;
+	}
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+		if (t->name_off != 0) {
+			err = btf_dump_add_emit_queue_id(d, id);
+			if (err)
+				return err;
+		}
+		tstate->order_state = ORDERED;
+		return 1;
+
+	case BTF_KIND_TYPEDEF: {
+		int is_strong;
+
+		is_strong = btf_dump_order_type(d, t->type, through_ptr);
+		if (is_strong < 0)
+			return is_strong;
+
+		/* typedef is similar to struct/union w.r.t. fwd-decls */
+		if (through_ptr && !is_strong)
+			return 0;
+
+		/* typedef is always a named definition */
+		err = btf_dump_add_emit_queue_id(d, id);
+		if (err)
+			return err;
+
+		d->type_states[id].order_state = ORDERED;
+		return 1;
+	}
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return btf_dump_order_type(d, t->type, through_ptr);
+
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = (void *)(t + 1);
+		bool is_strong;
+
+		err = btf_dump_order_type(d, t->type, through_ptr);
+		if (err < 0)
+			return err;
+		is_strong = err > 0;
+
+		vlen = btf_vlen_of(t);
+		for (i = 0; i < vlen; i++, p++) {
+			err = btf_dump_order_type(d, p->type, through_ptr);
+			if (err < 0)
+				return err;
+			if (err > 0)
+				is_strong = true;
+		}
+		return is_strong;
+	}
+	case BTF_KIND_FUNC:
+	case BTF_KIND_VAR:
+	case BTF_KIND_DATASEC:
+		d->type_states[id].order_state = ORDERED;
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t);
+static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t);
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+				  const struct btf_type *t);
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+				      const struct btf_type *t, int lvl);
+
+/* a local view into a shared stack */
+struct id_stack {
+	const __u32 *ids;
+	int cnt;
+};
+
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+				    const char *fname, int lvl);
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+				     struct id_stack *decl_stack,
+				     const char *fname, int lvl);
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id);
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+				 const char *orig_name);
+
+static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(d->btf, id);
+
+	/* __builtin_va_list is a compiler built-in, which causes compilation
+	 * errors, when compiling w/ different compiler, then used to compile
+	 * original code (e.g., GCC to compile kernel, Clang to use generated
+	 * C header from BTF). As it is built-in, it should be already defined
+	 * properly internally in compiler.
+	 */
+	if (t->name_off == 0)
+		return false;
+	return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
+}
+
+/*
+ * Emit C-syntax definitions of types from chains of BTF types.
+ *
+ * High-level handling of determining necessary forward declarations are handled
+ * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
+ * declarations/definitions in C syntax  are handled by a combo of
+ * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
+ * corresponding btf_dump_emit_*_{def,fwd}() functions.
+ *
+ * We also keep track of "containing struct/union type ID" to determine when
+ * we reference it from inside and thus can avoid emitting unnecessary forward
+ * declaration.
+ *
+ * This algorithm is designed in such a way, that even if some error occurs
+ * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
+ * that doesn't comply to C rules completely), algorithm will try to proceed
+ * and produce as much meaningful output as possible.
+ */
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
+{
+	struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+	bool top_level_def = cont_id == 0;
+	const struct btf_type *t;
+	__u16 kind;
+
+	if (tstate->emit_state == EMITTED)
+		return;
+
+	t = btf__type_by_id(d->btf, id);
+	kind = btf_kind_of(t);
+
+	if (top_level_def && t->name_off == 0) {
+		pr_warning("unexpected nameless definition, id:[%u]\n", id);
+		return;
+	}
+
+	if (tstate->emit_state == EMITTING) {
+		if (tstate->fwd_emitted)
+			return;
+
+		switch (kind) {
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			/*
+			 * if we are referencing a struct/union that we are
+			 * part of - then no need for fwd declaration
+			 */
+			if (id == cont_id)
+				return;
+			if (t->name_off == 0) {
+				pr_warning("anonymous struct/union loop, id:[%u]\n",
+					   id);
+				return;
+			}
+			btf_dump_emit_struct_fwd(d, id, t);
+			btf_dump_printf(d, ";\n\n");
+			tstate->fwd_emitted = 1;
+			break;
+		case BTF_KIND_TYPEDEF:
+			/*
+			 * for typedef fwd_emitted means typedef definition
+			 * was emitted, but it can be used only for "weak"
+			 * references through pointer only, not for embedding
+			 */
+			if (!btf_dump_is_blacklisted(d, id)) {
+				btf_dump_emit_typedef_def(d, id, t, 0);
+				btf_dump_printf(d, ";\n\n");
+			};
+			tstate->fwd_emitted = 1;
+			break;
+		default:
+			break;
+		}
+
+		return;
+	}
+
+	switch (kind) {
+	case BTF_KIND_INT:
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_ENUM:
+		if (top_level_def) {
+			btf_dump_emit_enum_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+		}
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_PTR:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		btf_dump_emit_type(d, t->type, cont_id);
+		break;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *a = (void *)(t + 1);
+
+		btf_dump_emit_type(d, a->type, cont_id);
+		break;
+	}
+	case BTF_KIND_FWD:
+		btf_dump_emit_fwd_def(d, id, t);
+		btf_dump_printf(d, ";\n\n");
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_TYPEDEF:
+		tstate->emit_state = EMITTING;
+		btf_dump_emit_type(d, t->type, id);
+		/*
+		 * typedef can server as both definition and forward
+		 * declaration; at this stage someone depends on
+		 * typedef as a forward declaration (refers to it
+		 * through pointer), so unless we already did it,
+		 * emit typedef as a forward declaration
+		 */
+		if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
+			btf_dump_emit_typedef_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+		}
+		tstate->emit_state = EMITTED;
+		break;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		tstate->emit_state = EMITTING;
+		/* if it's a top-level struct/union definition or struct/union
+		 * is anonymous, then in C we'll be emitting all fields and
+		 * their types (as opposed to just `struct X`), so we need to
+		 * make sure that all types, referenced from struct/union
+		 * members have necessary forward-declarations, where
+		 * applicable
+		 */
+		if (top_level_def || t->name_off == 0) {
+			const struct btf_member *m = (void *)(t + 1);
+			__u16 vlen = btf_vlen_of(t);
+			int i, new_cont_id;
+
+			new_cont_id = t->name_off == 0 ? cont_id : id;
+			for (i = 0; i < vlen; i++, m++)
+				btf_dump_emit_type(d, m->type, new_cont_id);
+		} else if (!tstate->fwd_emitted && id != cont_id) {
+			btf_dump_emit_struct_fwd(d, id, t);
+			btf_dump_printf(d, ";\n\n");
+			tstate->fwd_emitted = 1;
+		}
+
+		if (top_level_def) {
+			btf_dump_emit_struct_def(d, id, t, 0);
+			btf_dump_printf(d, ";\n\n");
+			tstate->emit_state = EMITTED;
+		} else {
+			tstate->emit_state = NOT_EMITTED;
+		}
+		break;
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p = (void *)(t + 1);
+		__u16 vlen = btf_vlen_of(t);
+		int i;
+
+		btf_dump_emit_type(d, t->type, cont_id);
+		for (i = 0; i < vlen; i++, p++)
+			btf_dump_emit_type(d, p->type, cont_id);
+
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+static int btf_align_of(const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+	__u16 kind = btf_kind_of(t);
+
+	switch (kind) {
+	case BTF_KIND_INT:
+	case BTF_KIND_ENUM:
+		return min(sizeof(void *), t->size);
+	case BTF_KIND_PTR:
+		return sizeof(void *);
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return btf_align_of(btf, t->type);
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *a = (void *)(t + 1);
+
+		return btf_align_of(btf, a->type);
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m = (void *)(t + 1);
+		__u16 vlen = btf_vlen_of(t);
+		int i, align = 1;
+
+		for (i = 0; i < vlen; i++, m++)
+			align = max(align, btf_align_of(btf, m->type));
+
+		return align;
+	}
+	default:
+		pr_warning("unsupported BTF_KIND:%u\n", btf_kind_of(t));
+		return 1;
+	}
+}
+
+static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+				 const struct btf_type *t)
+{
+	const struct btf_member *m;
+	int align, i, bit_sz;
+	__u16 vlen;
+	bool kflag;
+
+	align = btf_align_of(btf, id);
+	/* size of a non-packed struct has to be a multiple of its alignment*/
+	if (t->size % align)
+		return true;
+
+	m = (void *)(t + 1);
+	kflag = btf_kflag_of(t);
+	vlen = btf_vlen_of(t);
+	/* all non-bitfield fields have to be naturally aligned */
+	for (i = 0; i < vlen; i++, m++) {
+		align = btf_align_of(btf, m->type);
+		bit_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+		if (bit_sz == 0 && m->offset % (8 * align) != 0)
+			return true;
+	}
+
+	/*
+	 * if original struct was marked as packed, but its layout is
+	 * naturally aligned, we'll detect that it's not packed
+	 */
+	return false;
+}
+
+static int chip_away_bits(int total, int at_most)
+{
+	return total % at_most ? : at_most;
+}
+
+static void btf_dump_emit_bit_padding(const struct btf_dump *d,
+				      int cur_off, int m_off, int m_bit_sz,
+				      int align, int lvl)
+{
+	int off_diff = m_off - cur_off;
+	int ptr_bits = sizeof(void *) * 8;
+
+	if (off_diff <= 0)
+		/* no gap */
+		return;
+	if (m_bit_sz == 0 && off_diff < align * 8)
+		/* natural padding will take care of a gap */
+		return;
+
+	while (off_diff > 0) {
+		const char *pad_type;
+		int pad_bits;
+
+		if (ptr_bits > 32 && off_diff > 32) {
+			pad_type = "long";
+			pad_bits = chip_away_bits(off_diff, ptr_bits);
+		} else if (off_diff > 16) {
+			pad_type = "int";
+			pad_bits = chip_away_bits(off_diff, 32);
+		} else if (off_diff > 8) {
+			pad_type = "short";
+			pad_bits = chip_away_bits(off_diff, 16);
+		} else {
+			pad_type = "char";
+			pad_bits = chip_away_bits(off_diff, 8);
+		}
+		btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+		off_diff -= pad_bits;
+	}
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t)
+{
+	btf_dump_printf(d, "%s %s",
+			btf_kind_of(t) == BTF_KIND_STRUCT ? "struct" : "union",
+			btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_struct_def(struct btf_dump *d,
+				     __u32 id,
+				     const struct btf_type *t,
+				     int lvl)
+{
+	const struct btf_member *m = (void *)(t + 1);
+	bool kflag = btf_kflag_of(t), is_struct;
+	int align, i, packed, off = 0;
+	__u16 vlen = btf_vlen_of(t);
+
+	is_struct = btf_kind_of(t) == BTF_KIND_STRUCT;
+	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
+	align = packed ? 1 : btf_align_of(d->btf, id);
+
+	btf_dump_printf(d, "%s%s%s {",
+			is_struct ? "struct" : "union",
+			t->name_off ? " " : "",
+			btf_dump_type_name(d, id));
+
+	for (i = 0; i < vlen; i++, m++) {
+		const char *fname;
+		int m_off, m_sz;
+
+		fname = btf_name_of(d, m->name_off);
+		m_sz = kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+		m_off = kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
+		align = packed ? 1 : btf_align_of(d->btf, m->type);
+
+		btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
+		btf_dump_printf(d, "\n%s", pfx(lvl + 1));
+		btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
+
+		if (m_sz) {
+			btf_dump_printf(d, ": %d", m_sz);
+			off = m_off + m_sz;
+		} else {
+			m_sz = max(0, btf__resolve_size(d->btf, m->type));
+			off = m_off + m_sz * 8;
+		}
+		btf_dump_printf(d, ";");
+	}
+
+	if (vlen)
+		btf_dump_printf(d, "\n");
+	btf_dump_printf(d, "%s}", pfx(lvl));
+	if (packed)
+		btf_dump_printf(d, " __attribute__((packed))");
+}
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t)
+{
+	btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+				   const struct btf_type *t,
+				   int lvl)
+{
+	const struct btf_enum *v = (void *)(t+1);
+	__u16 vlen = btf_vlen_of(t);
+	const char *name;
+	size_t dup_cnt;
+	int i;
+
+	btf_dump_printf(d, "enum%s%s",
+			t->name_off ? " " : "",
+			btf_dump_type_name(d, id));
+
+	if (vlen) {
+		btf_dump_printf(d, " {");
+		for (i = 0; i < vlen; i++, v++) {
+			name = btf_name_of(d, v->name_off);
+			/* enumerators share namespace with typedef idents */
+			dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+			if (dup_cnt > 1) {
+				btf_dump_printf(d, "\n%s%s___%zu = %d,",
+						pfx(lvl + 1), name, dup_cnt,
+						(__s32)v->val);
+			} else {
+				btf_dump_printf(d, "\n%s%s = %d,",
+						pfx(lvl + 1), name,
+						(__s32)v->val);
+			}
+		}
+		btf_dump_printf(d, "\n%s}", pfx(lvl));
+	}
+}
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+				  const struct btf_type *t)
+{
+	const char *name = btf_dump_type_name(d, id);
+
+	if (btf_kflag_of(t))
+		btf_dump_printf(d, "union %s", name);
+	else
+		btf_dump_printf(d, "struct %s", name);
+}
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+				     const struct btf_type *t, int lvl)
+{
+	const char *name = btf_dump_ident_name(d, id);
+
+	btf_dump_printf(d, "typedef ");
+	btf_dump_emit_type_decl(d, t->type, name, lvl);
+}
+
+static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
+{
+	__u32 *new_stack;
+	size_t new_cap;
+
+	if (d->decl_stack_cnt >= d->decl_stack_cap) {
+		new_cap = max(16, d->decl_stack_cap * 3 / 2);
+		new_stack = realloc(d->decl_stack,
+				    new_cap * sizeof(new_stack[0]));
+		if (!new_stack)
+			return -ENOMEM;
+		d->decl_stack = new_stack;
+		d->decl_stack_cap = new_cap;
+	}
+
+	d->decl_stack[d->decl_stack_cnt++] = id;
+
+	return 0;
+}
+
+/*
+ * Emit type declaration (e.g., field type declaration in a struct or argument
+ * declaration in function prototype) in correct C syntax.
+ *
+ * For most types it's trivial, but there are few quirky type declaration
+ * cases worth mentioning:
+ *   - function prototypes (especially nesting of function prototypes);
+ *   - arrays;
+ *   - const/volatile/restrict for pointers vs other types.
+ *
+ * For a good discussion of *PARSING* C syntax (as a human), see
+ * Peter van der Linden's "Expert C Programming: Deep C Secrets",
+ * Ch.3 "Unscrambling Declarations in C".
+ *
+ * It won't help with BTF to C conversion much, though, as it's an opposite
+ * problem. So we came up with this algorithm in reverse to van der Linden's
+ * parsing algorithm. It goes from structured BTF representation of type
+ * declaration to a valid compilable C syntax.
+ *
+ * For instance, consider this C typedef:
+ *	typedef const int * const * arr[10] arr_t;
+ * It will be represented in BTF with this chain of BTF types:
+ *	[typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int]
+ *
+ * Notice how [const] modifier always goes before type it modifies in BTF type
+ * graph, but in C syntax, const/volatile/restrict modifiers are written to
+ * the right of pointers, but to the left of other types. There are also other
+ * quirks, like function pointers, arrays of them, functions returning other
+ * functions, etc.
+ *
+ * We handle that by pushing all the types to a stack, until we hit "terminal"
+ * type (int/enum/struct/union/fwd). Then depending on the kind of a type on
+ * top of a stack, modifiers are handled differently. Array/function pointers
+ * have also wildly different syntax and how nesting of them are done. See
+ * code for authoritative definition.
+ *
+ * To avoid allocating new stack for each independent chain of BTF types, we
+ * share one bigger stack, with each chain working only on its own local view
+ * of a stack frame. Some care is required to "pop" stack frames after
+ * processing type declaration chain.
+ */
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+				    const char *fname, int lvl)
+{
+	struct id_stack decl_stack;
+	const struct btf_type *t;
+	int err, stack_start;
+	__u16 kind;
+
+	stack_start = d->decl_stack_cnt;
+	for (;;) {
+		err = btf_dump_push_decl_stack_id(d, id);
+		if (err < 0) {
+			/*
+			 * if we don't have enough memory for entire type decl
+			 * chain, restore stack, emit warning, and try to
+			 * proceed nevertheless
+			 */
+			pr_warning("not enough memory for decl stack:%d", err);
+			d->decl_stack_cnt = stack_start;
+			return;
+		}
+
+		/* VOID */
+		if (id == 0)
+			break;
+
+		t = btf__type_by_id(d->btf, id);
+		kind = btf_kind_of(t);
+		switch (kind) {
+		case BTF_KIND_PTR:
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_FUNC_PROTO:
+			id = t->type;
+			break;
+		case BTF_KIND_ARRAY: {
+			const struct btf_array *a = (void *)(t + 1);
+
+			id = a->type;
+			break;
+		}
+		case BTF_KIND_INT:
+		case BTF_KIND_ENUM:
+		case BTF_KIND_FWD:
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+		case BTF_KIND_TYPEDEF:
+			goto done;
+		default:
+			pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
+				   kind, id);
+			goto done;
+		}
+	}
+done:
+	/*
+	 * We might be inside a chain of declarations (e.g., array of function
+	 * pointers returning anonymous (so inlined) structs, having another
+	 * array field). Each of those needs its own "stack frame" to handle
+	 * emitting of declarations. Those stack frames are non-overlapping
+	 * portions of shared btf_dump->decl_stack. To make it a bit nicer to
+	 * handle this set of nested stacks, we create a view corresponding to
+	 * our own "stack frame" and work with it as an independent stack.
+	 * We'll need to clean up after emit_type_chain() returns, though.
+	 */
+	decl_stack.ids = d->decl_stack + stack_start;
+	decl_stack.cnt = d->decl_stack_cnt - stack_start;
+	btf_dump_emit_type_chain(d, &decl_stack, fname, lvl);
+	/*
+	 * emit_type_chain() guarantees that it will pop its entire decl_stack
+	 * frame before returning. But it works with a read-only view into
+	 * decl_stack, so it doesn't actually pop anything from the
+	 * perspective of shared btf_dump->decl_stack, per se. We need to
+	 * reset decl_stack state to how it was before us to avoid it growing
+	 * all the time.
+	 */
+	d->decl_stack_cnt = stack_start;
+}
+
+static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
+{
+	const struct btf_type *t;
+	__u32 id;
+
+	while (decl_stack->cnt) {
+		id = decl_stack->ids[decl_stack->cnt - 1];
+		t = btf__type_by_id(d->btf, id);
+
+		switch (btf_kind_of(t)) {
+		case BTF_KIND_VOLATILE:
+			btf_dump_printf(d, "volatile ");
+			break;
+		case BTF_KIND_CONST:
+			btf_dump_printf(d, "const ");
+			break;
+		case BTF_KIND_RESTRICT:
+			btf_dump_printf(d, "restrict ");
+			break;
+		default:
+			return;
+		}
+		decl_stack->cnt--;
+	}
+}
+
+static bool btf_is_mod_kind(const struct btf *btf, __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+
+	switch (btf_kind_of(t)) {
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void btf_dump_emit_name(const struct btf_dump *d,
+			       const char *name, bool last_was_ptr)
+{
+	bool separate = name[0] && !last_was_ptr;
+
+	btf_dump_printf(d, "%s%s", separate ? " " : "", name);
+}
+
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+				     struct id_stack *decls,
+				     const char *fname, int lvl)
+{
+	/*
+	 * last_was_ptr is used to determine if we need to separate pointer
+	 * asterisk (*) from previous part of type signature with space, so
+	 * that we get `int ***`, instead of `int * * *`. We default to true
+	 * for cases where we have single pointer in a chain. E.g., in ptr ->
+	 * func_proto case. func_proto will start a new emit_type_chain call
+	 * with just ptr, which should be emitted as (*) or (*<fname>), so we
+	 * don't want to prepend space for that last pointer.
+	 */
+	bool last_was_ptr = true;
+	const struct btf_type *t;
+	const char *name;
+	__u16 kind;
+	__u32 id;
+
+	while (decls->cnt) {
+		id = decls->ids[--decls->cnt];
+		if (id == 0) {
+			/* VOID is a special snowflake */
+			btf_dump_emit_mods(d, decls);
+			btf_dump_printf(d, "void");
+			last_was_ptr = false;
+			continue;
+		}
+
+		t = btf__type_by_id(d->btf, id);
+		kind = btf_kind_of(t);
+
+		switch (kind) {
+		case BTF_KIND_INT:
+			btf_dump_emit_mods(d, decls);
+			name = btf_name_of(d, t->name_off);
+			btf_dump_printf(d, "%s", name);
+			break;
+		case BTF_KIND_STRUCT:
+		case BTF_KIND_UNION:
+			btf_dump_emit_mods(d, decls);
+			/* inline anonymous struct/union */
+			if (t->name_off == 0)
+				btf_dump_emit_struct_def(d, id, t, lvl);
+			else
+				btf_dump_emit_struct_fwd(d, id, t);
+			break;
+		case BTF_KIND_ENUM:
+			btf_dump_emit_mods(d, decls);
+			/* inline anonymous enum */
+			if (t->name_off == 0)
+				btf_dump_emit_enum_def(d, id, t, lvl);
+			else
+				btf_dump_emit_enum_fwd(d, id, t);
+			break;
+		case BTF_KIND_FWD:
+			btf_dump_emit_mods(d, decls);
+			btf_dump_emit_fwd_def(d, id, t);
+			break;
+		case BTF_KIND_TYPEDEF:
+			btf_dump_emit_mods(d, decls);
+			btf_dump_printf(d, "%s", btf_dump_ident_name(d, id));
+			break;
+		case BTF_KIND_PTR:
+			btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *");
+			break;
+		case BTF_KIND_VOLATILE:
+			btf_dump_printf(d, " volatile");
+			break;
+		case BTF_KIND_CONST:
+			btf_dump_printf(d, " const");
+			break;
+		case BTF_KIND_RESTRICT:
+			btf_dump_printf(d, " restrict");
+			break;
+		case BTF_KIND_ARRAY: {
+			const struct btf_array *a = (void *)(t + 1);
+			const struct btf_type *next_t;
+			__u32 next_id;
+			bool multidim;
+			/*
+			 * GCC has a bug
+			 * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354)
+			 * which causes it to emit extra const/volatile
+			 * modifiers for an array, if array's element type has
+			 * const/volatile modifiers. Clang doesn't do that.
+			 * In general, it doesn't seem very meaningful to have
+			 * a const/volatile modifier for array, so we are
+			 * going to silently skip them here.
+			 */
+			while (decls->cnt) {
+				next_id = decls->ids[decls->cnt - 1];
+				if (btf_is_mod_kind(d->btf, next_id))
+					decls->cnt--;
+				else
+					break;
+			}
+
+			if (decls->cnt == 0) {
+				btf_dump_emit_name(d, fname, last_was_ptr);
+				btf_dump_printf(d, "[%u]", a->nelems);
+				return;
+			}
+
+			next_t = btf__type_by_id(d->btf, next_id);
+			multidim = btf_kind_of(next_t) == BTF_KIND_ARRAY;
+			/* we need space if we have named non-pointer */
+			if (fname[0] && !last_was_ptr)
+				btf_dump_printf(d, " ");
+			/* no parentheses for multi-dimensional array */
+			if (!multidim)
+				btf_dump_printf(d, "(");
+			btf_dump_emit_type_chain(d, decls, fname, lvl);
+			if (!multidim)
+				btf_dump_printf(d, ")");
+			btf_dump_printf(d, "[%u]", a->nelems);
+			return;
+		}
+		case BTF_KIND_FUNC_PROTO: {
+			const struct btf_param *p = (void *)(t + 1);
+			__u16 vlen = btf_vlen_of(t);
+			int i;
+
+			btf_dump_emit_mods(d, decls);
+			if (decls->cnt) {
+				btf_dump_printf(d, " (");
+				btf_dump_emit_type_chain(d, decls, fname, lvl);
+				btf_dump_printf(d, ")");
+			} else {
+				btf_dump_emit_name(d, fname, last_was_ptr);
+			}
+			btf_dump_printf(d, "(");
+			/*
+			 * Clang for BPF target generates func_proto with no
+			 * args as a func_proto with a single void arg (e.g.,
+			 * `int (*f)(void)` vs just `int (*f)()`). We are
+			 * going to pretend there are no args for such case.
+			 */
+			if (vlen == 1 && p->type == 0) {
+				btf_dump_printf(d, ")");
+				return;
+			}
+
+			for (i = 0; i < vlen; i++, p++) {
+				if (i > 0)
+					btf_dump_printf(d, ", ");
+
+				/* last arg of type void is vararg */
+				if (i == vlen - 1 && p->type == 0) {
+					btf_dump_printf(d, "...");
+					break;
+				}
+
+				name = btf_name_of(d, p->name_off);
+				btf_dump_emit_type_decl(d, p->type, name, lvl);
+			}
+
+			btf_dump_printf(d, ")");
+			return;
+		}
+		default:
+			pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n",
+				   kind, id);
+			return;
+		}
+
+		last_was_ptr = kind == BTF_KIND_PTR;
+	}
+
+	btf_dump_emit_name(d, fname, last_was_ptr);
+}
+
+/* return number of duplicates (occurrences) of a given name */
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+				 const char *orig_name)
+{
+	size_t dup_cnt = 0;
+
+	hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+	dup_cnt++;
+	hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
+
+	return dup_cnt;
+}
+
+static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
+					 struct hashmap *name_map)
+{
+	struct btf_dump_type_aux_state *s = &d->type_states[id];
+	const struct btf_type *t = btf__type_by_id(d->btf, id);
+	const char *orig_name = btf_name_of(d, t->name_off);
+	const char **cached_name = &d->cached_names[id];
+	size_t dup_cnt;
+
+	if (t->name_off == 0)
+		return "";
+
+	if (s->name_resolved)
+		return *cached_name ? *cached_name : orig_name;
+
+	dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
+	if (dup_cnt > 1) {
+		const size_t max_len = 256;
+		char new_name[max_len];
+
+		snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt);
+		*cached_name = strdup(new_name);
+	}
+
+	s->name_resolved = 1;
+	return *cached_name ? *cached_name : orig_name;
+}
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id)
+{
+	return btf_dump_resolve_name(d, id, d->type_names);
+}
+
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
+{
+	return btf_dump_resolve_name(d, id, d->ident_names);
+}
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 6ea5ce19b9e0..8bf51d0a6072 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -167,5 +167,8 @@ LIBBPF_0.0.3 {
 
 LIBBPF_0.0.4 {
 	global:
+		btf_dump__dump_type;
+		btf_dump__free;
+		btf_dump__new;
 		btf__parse_elf;
 } LIBBPF_0.0.3;
-- 
cgit v1.2.3-59-g8ed1b


From 2d2a3ad872f884d618f11e1a2028cff862503dcd Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:04 -0700
Subject: selftests/bpf: add btf_dump BTF-to-C conversion tests

Add new test_btf_dump set of tests, validating BTF-to-C conversion
correctness. Tests rely on clang to generate BTF from provided C test
cases.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore             |   1 +
 tools/testing/selftests/bpf/Makefile               |   3 +-
 .../bpf/progs/btf_dump_test_case_bitfields.c       |  92 +++++++++
 .../bpf/progs/btf_dump_test_case_multidim.c        |  35 ++++
 .../bpf/progs/btf_dump_test_case_namespacing.c     |  73 +++++++
 .../bpf/progs/btf_dump_test_case_ordering.c        |  63 ++++++
 .../bpf/progs/btf_dump_test_case_packing.c         |  75 +++++++
 .../bpf/progs/btf_dump_test_case_padding.c         | 111 ++++++++++
 .../bpf/progs/btf_dump_test_case_syntax.c          | 229 +++++++++++++++++++++
 tools/testing/selftests/bpf/test_btf_dump.c        | 143 +++++++++++++
 10 files changed, 824 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
 create mode 100644 tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
 create mode 100644 tools/testing/selftests/bpf/test_btf_dump.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 138b6c063916..b3da2ffdc158 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -36,3 +36,4 @@ alu32
 libbpf.pc
 libbpf.so.*
 test_hashmap
+test_btf_dump
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index ddae06498a00..cd23758e8b7a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
-	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap
+	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
+	test_btf_dump
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
new file mode 100644
index 000000000000..8f44767a75fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper tests for bitfield.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdbool.h>
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct bitfields_only_mixed_types {
+ *	int a: 3;
+ *	long int b: 2;
+ *	_Bool c: 1;
+ *	enum {
+ *		A = 0,
+ *		B = 1,
+ *	} d: 1;
+ *	short e: 5;
+ *	int: 20;
+ *	unsigned int f: 30;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct bitfields_only_mixed_types {
+	int a: 3;
+	long int b: 2;
+	bool c: 1; /* it's really a _Bool type */
+	enum {
+		A, /* A = 0, dumper is very explicit */
+		B, /* B = 1, same */
+	} d: 1;
+	short e: 5;
+	/* 20-bit padding here */
+	unsigned f: 30; /* this gets aligned on 4-byte boundary */
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct bitfield_mixed_with_others {
+ *	char: 4;
+ *	int a: 4;
+ *	short b;
+ *	long int c;
+ *	long int d: 8;
+ *	int e;
+ *	int f;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+struct bitfield_mixed_with_others {
+	long: 4; /* char is enough as a backing field */
+	int a: 4;
+	/* 8-bit implicit padding */
+	short b; /* combined with previous bitfield */
+	/* 4 more bytes of implicit padding */
+	long c;
+	long d: 8;
+	/* 24 bits implicit padding */
+	int e; /* combined with previous bitfield */
+	int f;
+	/* 4 bytes of padding */
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct bitfield_flushed {
+ *	int a: 4;
+ *	long: 60;
+ *	long int b: 16;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+struct bitfield_flushed {
+	int a: 4;
+	long: 0; /* flush until next natural alignment boundary */
+	long b: 16;
+};
+
+int f(struct {
+	struct bitfields_only_mixed_types _1;
+	struct bitfield_mixed_with_others _2;
+	struct bitfield_flushed _3;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
new file mode 100644
index 000000000000..ba97165bdb28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test for multi-dimensional array output.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+typedef int arr_t[2];
+
+typedef int multiarr_t[3][4][5];
+
+typedef int *ptr_arr_t[6];
+
+typedef int *ptr_multiarr_t[7][8][9][10];
+
+typedef int * (*fn_ptr_arr_t[11])();
+
+typedef int * (*fn_ptr_multiarr_t[12][13])();
+
+struct root_struct {
+	arr_t _1;
+	multiarr_t _2;
+	ptr_arr_t _3;
+	ptr_multiarr_t _4;
+	fn_ptr_arr_t _5;
+	fn_ptr_multiarr_t _6;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct root_struct *s)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c
new file mode 100644
index 000000000000..92a4ad428710
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_namespacing.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test validating no name versioning happens between
+ * independent C namespaces (struct/union/enum vs typedef/enum values).
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct S {
+	int S;
+	int U;
+};
+
+typedef struct S S;
+
+union U {
+	int S;
+	int U;
+};
+
+typedef union U U;
+
+enum E {
+	V = 0,
+};
+
+typedef enum E E;
+
+struct A {};
+
+union B {};
+
+enum C {
+	A = 1,
+	B = 2,
+	C = 3,
+};
+
+struct X {};
+
+union Y {};
+
+enum Z;
+
+typedef int X;
+
+typedef int Y;
+
+typedef int Z;
+
+/*------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct {
+	struct S _1;
+	S _2;
+	union U _3;
+	U _4;
+	enum E _5;
+	E _6;
+	struct A a;
+	union B b;
+	enum C c;
+	struct X x;
+	union Y y;
+	enum Z *z;
+	X xx;
+	Y yy;
+	Z zz;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c
new file mode 100644
index 000000000000..7c95702ee4cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_ordering.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test for topological sorting of dependent structs.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct s1 {};
+
+struct s3;
+
+struct s4;
+
+struct s2 {
+	struct s2 *s2;
+	struct s3 *s3;
+	struct s4 *s4;
+};
+
+struct s3 {
+	struct s1 s1;
+	struct s2 s2;
+};
+
+struct s4 {
+	struct s1 s1;
+	struct s3 s3;
+};
+
+struct list_head {
+	struct list_head *next;
+	struct list_head *prev;
+};
+
+struct hlist_node {
+	struct hlist_node *next;
+	struct hlist_node **pprev;
+};
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct callback_head {
+	struct callback_head *next;
+	void (*func)(struct callback_head *);
+};
+
+struct root_struct {
+	struct s4 s4;
+	struct list_head l;
+	struct hlist_node n;
+	struct hlist_head h;
+	struct callback_head cb;
+};
+
+/*------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct root_struct *root)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
new file mode 100644
index 000000000000..1cef3bec1dc7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper tests for struct packing determination.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct packed_trailing_space {
+	int a;
+	short b;
+} __attribute__((packed));
+
+struct non_packed_trailing_space {
+	int a;
+	short b;
+};
+
+struct packed_fields {
+	short a;
+	int b;
+} __attribute__((packed));
+
+struct non_packed_fields {
+	short a;
+	int b;
+};
+
+struct nested_packed {
+	char: 4;
+	int a: 4;
+	long int b;
+	struct {
+		char c;
+		int d;
+	} __attribute__((packed)) e;
+} __attribute__((packed));
+
+union union_is_never_packed {
+	int a: 4;
+	char b;
+	char c: 1;
+};
+
+union union_does_not_need_packing {
+	struct {
+		long int a;
+		int b;
+	} __attribute__((packed));
+	int c;
+};
+
+union jump_code_union {
+	char code[5];
+	struct {
+		char jump;
+		int offset;
+	} __attribute__((packed));
+};
+
+/*------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct {
+	struct packed_trailing_space _1;
+	struct non_packed_trailing_space _2;
+	struct packed_fields _3;
+	struct non_packed_fields _4;
+	struct nested_packed _5;
+	union union_is_never_packed _6;
+	union union_does_not_need_packing _7;
+	union jump_code_union _8;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
new file mode 100644
index 000000000000..3a62119c7498
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper tests for implicit and explicit padding between fields and
+ * at the end of a struct.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct padded_implicitly {
+	int a;
+	long int b;
+	char c;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padded_explicitly {
+ *	int a;
+ *	int: 32;
+ *	int b;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct padded_explicitly {
+	int a;
+	int: 1; /* algo will explicitly pad with full 32 bits here */
+	int b;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padded_a_lot {
+ *	int a;
+ *	long: 32;
+ *	long: 64;
+ *	long: 64;
+ *	int b;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct padded_a_lot {
+	int a;
+	/* 32 bit of implicit padding here, which algo will make explicit */
+	long: 64;
+	long: 64;
+	int b;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padded_cache_line {
+ *	int a;
+ *	long: 32;
+ *	long: 64;
+ *	long: 64;
+ *	long: 64;
+ *	int b;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct padded_cache_line {
+	int a;
+	int b __attribute__((aligned(32)));
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct zone_padding {
+ *	char x[0];
+ *};
+ *
+ *struct zone {
+ *	int a;
+ *	short b;
+ *	short: 16;
+ *	struct zone_padding __pad__;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct zone_padding {
+	char x[0];
+} __attribute__((__aligned__(8)));
+
+struct zone {
+	int a;
+	short b;
+	short: 16;
+	struct zone_padding __pad__;
+};
+
+int f(struct {
+	struct padded_implicitly _1;
+	struct padded_explicitly _2;
+	struct padded_a_lot _3;
+	struct padded_cache_line _4;
+	struct zone _5;
+} *_)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
new file mode 100644
index 000000000000..d4a02fe44a12
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C dumper test for majority of C syntax quirks.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+enum e1 {
+	A = 0,
+	B = 1,
+};
+
+enum e2 {
+	C = 100,
+	D = -100,
+	E = 0,
+};
+
+typedef enum e2 e2_t;
+
+typedef enum {
+	F = 0,
+	G = 1,
+	H = 2,
+} e3_t;
+
+typedef int int_t;
+
+typedef volatile const int * volatile const crazy_ptr_t;
+
+typedef int *****we_need_to_go_deeper_ptr_t;
+
+typedef volatile const we_need_to_go_deeper_ptr_t * restrict * volatile * const * restrict volatile * restrict const * volatile const * restrict volatile const how_about_this_ptr_t;
+
+typedef int *ptr_arr_t[10];
+
+typedef void (*fn_ptr1_t)(int);
+
+typedef void (*printf_fn_t)(const char *, ...);
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+/*
+ * While previous function pointers are pretty trivial (C-syntax-level
+ * trivial), the following are deciphered here for future generations:
+ *
+ * - `fn_ptr2_t`: function, taking anonymous struct as a first arg and pointer
+ *   to a function, that takes int and returns int, as a second arg; returning
+ *   a pointer to a const pointer to a char. Equivalent to:
+ *	typedef struct { int a; } s_t;
+ *	typedef int (*fn_t)(int);
+ *	typedef char * const * (*fn_ptr2_t)(s_t, fn_t);
+ *
+ * - `fn_complext_t`: pointer to a function returning struct and accepting
+ *   union and struct. All structs and enum are anonymous and defined inline.
+ *
+ * - `signal_t: pointer to a function accepting a pointer to a function as an
+ *   argument and returning pointer to a function as a result. Sane equivalent:
+ *	typedef void (*signal_handler_t)(int);
+ *	typedef signal_handler_t (*signal_ptr_t)(int, signal_handler_t);
+ *
+ * - fn_ptr_arr1_t: array of pointers to a function accepting pointer to
+ *   a pointer to an int and returning pointer to a char. Easy.
+ *
+ * - fn_ptr_arr2_t: array of const pointers to a function taking no arguments
+ *   and returning a const pointer to a function, that takes pointer to a
+ *   `int -> char *` function and returns pointer to a char. Equivalent:
+ *   typedef char * (*fn_input_t)(int);
+ *   typedef char * (*fn_output_outer_t)(fn_input_t);
+ *   typedef const fn_output_outer_t (* fn_output_inner_t)();
+ *   typedef const fn_output_inner_t fn_ptr_arr2_t[5];
+ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+typedef char * const * (*fn_ptr2_t)(struct {
+	int a;
+}, int (*)(int));
+
+typedef struct {
+	int a;
+	void (*b)(int, struct {
+		int c;
+	}, union {
+		char d;
+		int e[5];
+	});
+} (*fn_complex_t)(union {
+	void *f;
+	char g[16];
+}, struct {
+	int h;
+});
+
+typedef void (* (*signal_t)(int, void (*)(int)))(int);
+
+typedef char * (*fn_ptr_arr1_t[10])(int **);
+
+typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+
+struct struct_w_typedefs {
+	int_t a;
+	crazy_ptr_t b;
+	we_need_to_go_deeper_ptr_t c;
+	how_about_this_ptr_t d;
+	ptr_arr_t e;
+	fn_ptr1_t f;
+	printf_fn_t g;
+	fn_ptr2_t h;
+	fn_complex_t i;
+	signal_t j;
+	fn_ptr_arr1_t k;
+	fn_ptr_arr2_t l;
+};
+
+typedef struct {
+	int x;
+	int y;
+	int z;
+} anon_struct_t;
+
+struct struct_fwd;
+
+typedef struct struct_fwd struct_fwd_t;
+
+typedef struct struct_fwd *struct_fwd_ptr_t;
+
+union union_fwd;
+
+typedef union union_fwd union_fwd_t;
+
+typedef union union_fwd *union_fwd_ptr_t;
+
+struct struct_empty {};
+
+struct struct_simple {
+	int a;
+	char b;
+	const int_t *p;
+	struct struct_empty s;
+	enum e2 e;
+	enum {
+		ANON_VAL1 = 1,
+		ANON_VAL2 = 2,
+	} f;
+	int arr1[13];
+	enum e2 arr2[5];
+};
+
+union union_empty {};
+
+union union_simple {
+	void *ptr;
+	int num;
+	int_t num2;
+	union union_empty u;
+};
+
+struct struct_in_struct {
+	struct struct_simple simple;
+	union union_simple also_simple;
+	struct {
+		int a;
+	} not_so_hard_as_well;
+	union {
+		int b;
+		int c;
+	} anon_union_is_good;
+	struct {
+		int d;
+		int e;
+	};
+	union {
+		int f;
+		int g;
+	};
+};
+
+struct struct_with_embedded_stuff {
+	int a;
+	struct {
+		int b;
+		struct {
+			struct struct_with_embedded_stuff *c;
+			const char *d;
+		} e;
+		union {
+			volatile long int f;
+			void * restrict g;
+		};
+	};
+	union {
+		const int_t *h;
+		void (*i)(char, int, void *);
+	} j;
+	enum {
+		K = 100,
+		L = 200,
+	} m;
+	char n[16];
+	struct {
+		char o;
+		int p;
+		void (*q)(int);
+	} r[5];
+	struct struct_in_struct s[10];
+	int t[11];
+};
+
+struct root_struct {
+	enum e1 _1;
+	enum e2 _2;
+	e2_t _2_1;
+	e3_t _2_2;
+	struct struct_w_typedefs _3;
+	anon_struct_t _7;
+	struct struct_fwd *_8;
+	struct_fwd_t *_9;
+	struct_fwd_ptr_t _10;
+	union union_fwd *_11;
+	union_fwd_t *_12;
+	union_fwd_ptr_t _13;
+	struct struct_with_embedded_stuff _14;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+int f(struct root_struct *s)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c
new file mode 100644
index 000000000000..8f850823d35f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_dump.c
@@ -0,0 +1,143 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <btf.h>
+
+#define CHECK(condition, format...) ({					\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
+		fprintf(stderr, format);				\
+	}								\
+	__ret;								\
+})
+
+void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+	vfprintf(ctx, fmt, args);
+}
+
+struct btf_dump_test_case {
+	const char *name;
+	struct btf_dump_opts opts;
+} btf_dump_test_cases[] = {
+	{.name = "btf_dump_test_case_syntax", .opts = {}},
+	{.name = "btf_dump_test_case_ordering", .opts = {}},
+	{.name = "btf_dump_test_case_padding", .opts = {}},
+	{.name = "btf_dump_test_case_packing", .opts = {}},
+	{.name = "btf_dump_test_case_bitfields", .opts = {}},
+	{.name = "btf_dump_test_case_multidim", .opts = {}},
+	{.name = "btf_dump_test_case_namespacing", .opts = {}},
+};
+
+static int btf_dump_all_types(const struct btf *btf,
+			      const struct btf_dump_opts *opts)
+{
+	size_t type_cnt = btf__get_nr_types(btf);
+	struct btf_dump *d;
+	int err = 0, id;
+
+	d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
+
+	for (id = 1; id <= type_cnt; id++) {
+		err = btf_dump__dump_type(d, id);
+		if (err)
+			goto done;
+	}
+
+done:
+	btf_dump__free(d);
+	return err;
+}
+
+int test_btf_dump_case(int n, struct btf_dump_test_case *test_case)
+{
+	char test_file[256], out_file[256], diff_cmd[1024];
+	struct btf *btf = NULL;
+	int err = 0, fd = -1;
+	FILE *f = NULL;
+
+	fprintf(stderr, "Test case #%d (%s): ", n, test_case->name);
+
+	snprintf(test_file, sizeof(test_file), "%s.o", test_case->name);
+
+	btf = btf__parse_elf(test_file, NULL);
+	if (CHECK(IS_ERR(btf),
+	    "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+		err = -PTR_ERR(btf);
+		btf = NULL;
+		goto done;
+	}
+
+	snprintf(out_file, sizeof(out_file),
+		 "/tmp/%s.output.XXXXXX", test_case->name);
+	fd = mkstemp(out_file);
+	if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) {
+		err = fd;
+		goto done;
+	}
+	f = fdopen(fd, "w");
+	if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n",
+		  strerror(errno), errno)) {
+		close(fd);
+		goto done;
+	}
+
+	test_case->opts.ctx = f;
+	err = btf_dump_all_types(btf, &test_case->opts);
+	fclose(f);
+	close(fd);
+	if (CHECK(err, "failure during C dumping: %d\n", err)) {
+		goto done;
+	}
+
+	snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name);
+	/*
+	 * Diff test output and expected test output, contained between
+	 * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case.
+	 * For expected output lines, everything before '*' is stripped out.
+	 * Also lines containing comment start and comment end markers are
+	 * ignored. 
+	 */
+	snprintf(diff_cmd, sizeof(diff_cmd),
+		 "awk '/START-EXPECTED-OUTPUT/{out=1;next} "
+		 "/END-EXPECTED-OUTPUT/{out=0} "
+		 "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */
+		 "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'",
+		 test_file, out_file);
+	err = system(diff_cmd);
+	if (CHECK(err,
+		  "differing test output, output=%s, err=%d, diff cmd:\n%s\n",
+		  out_file, err, diff_cmd))
+		goto done;
+
+	remove(out_file);
+	fprintf(stderr, "OK\n");
+
+done:
+	btf__free(btf);
+	return err;
+}
+
+int main() {
+	int test_case_cnt, i, err, failed = 0;
+
+	test_case_cnt = sizeof(btf_dump_test_cases) /
+			sizeof(btf_dump_test_cases[0]);
+
+	for (i = 0; i < test_case_cnt; i++) {
+		err = test_btf_dump_case(i, &btf_dump_test_cases[i]);
+		if (err)
+			failed++;
+	}
+
+	fprintf(stderr, "%d tests succeeded, %d tests failed.\n",
+		test_case_cnt - failed, failed);
+
+	return failed;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 2119f2189df1836d28909fe5a90b3d93638ecf65 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:05 -0700
Subject: bpftool: add C output format option to btf dump subcommand

Utilize new libbpf's btf_dump API to emit BTF as a C definitions.

Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index a22ef6587ebe..1b8ec91899e6 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -340,11 +340,49 @@ static int dump_btf_raw(const struct btf *btf,
 	return 0;
 }
 
+static void __printf(2, 0) btf_dump_printf(void *ctx,
+					   const char *fmt, va_list args)
+{
+	vfprintf(stdout, fmt, args);
+}
+
+static int dump_btf_c(const struct btf *btf,
+		      __u32 *root_type_ids, int root_type_cnt)
+{
+	struct btf_dump *d;
+	int err = 0, i;
+
+	d = btf_dump__new(btf, NULL, NULL, btf_dump_printf);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
+
+	if (root_type_cnt) {
+		for (i = 0; i < root_type_cnt; i++) {
+			err = btf_dump__dump_type(d, root_type_ids[i]);
+			if (err)
+				goto done;
+		}
+	} else {
+		int cnt = btf__get_nr_types(btf);
+
+		for (i = 1; i <= cnt; i++) {
+			err = btf_dump__dump_type(d, i);
+			if (err)
+				goto done;
+		}
+	}
+
+done:
+	btf_dump__free(d);
+	return err;
+}
+
 static int do_dump(int argc, char **argv)
 {
 	struct btf *btf = NULL;
 	__u32 root_type_ids[2];
 	int root_type_cnt = 0;
+	bool dump_c = false;
 	__u32 btf_id = -1;
 	const char *src;
 	int fd = -1;
@@ -431,6 +469,29 @@ static int do_dump(int argc, char **argv)
 		goto done;
 	}
 
+	while (argc) {
+		if (is_prefix(*argv, "format")) {
+			NEXT_ARG();
+			if (argc < 1) {
+				p_err("expecting value for 'format' option\n");
+				goto done;
+			}
+			if (strcmp(*argv, "c") == 0) {
+				dump_c = true;
+			} else if (strcmp(*argv, "raw") == 0) {
+				dump_c = false;
+			} else {
+				p_err("unrecognized format specifier: '%s', possible values: raw, c",
+				      *argv);
+				goto done;
+			}
+			NEXT_ARG();
+		} else {
+			p_err("unrecognized option: '%s'", *argv);
+			goto done;
+		}
+	}
+
 	if (!btf) {
 		err = btf__get_from_id(btf_id, &btf);
 		if (err) {
@@ -444,7 +505,16 @@ static int do_dump(int argc, char **argv)
 		}
 	}
 
-	dump_btf_raw(btf, root_type_ids, root_type_cnt);
+	if (dump_c) {
+		if (json_output) {
+			p_err("JSON output for C-syntax dump is not supported");
+			err = -ENOTSUP;
+			goto done;
+		}
+		err = dump_btf_c(btf, root_type_ids, root_type_cnt);
+	} else {
+		err = dump_btf_raw(btf, root_type_ids, root_type_cnt);
+	}
 
 done:
 	close(fd);
@@ -460,10 +530,11 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s btf dump BTF_SRC\n"
+		"Usage: %s btf dump BTF_SRC [format FORMAT]\n"
 		"       %s btf help\n"
 		"\n"
 		"       BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
+		"       FORMAT  := { raw | c }\n"
 		"       " HELP_SPEC_MAP "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
-- 
cgit v1.2.3-59-g8ed1b


From 220ba451124e275ff548bec33b0dc01e40895b06 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:06 -0700
Subject: bpftool/docs: add description of btf dump C option

Document optional **c** option for btf dump subcommand.

Cc: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/bpftool-btf.rst | 35 ++++++++++++++-----------
 1 file changed, 20 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 2dbc1413fabd..3daed9eba766 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -19,10 +19,11 @@ SYNOPSIS
 BTF COMMANDS
 =============
 
-|	**bpftool** **btf dump** *BTF_SRC*
+|	**bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
 |	**bpftool** **btf help**
 |
 |	*BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
+|	*FORMAT* := { **raw** | **c** }
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 
@@ -31,23 +32,27 @@ DESCRIPTION
 	**bpftool btf dump** *BTF_SRC*
 		  Dump BTF entries from a given *BTF_SRC*.
 
-                  When **id** is specified, BTF object with that ID will be
-                  loaded and all its BTF types emitted.
+		  When **id** is specified, BTF object with that ID will be
+		  loaded and all its BTF types emitted.
 
-                  When **map** is provided, it's expected that map has
-                  associated BTF object with BTF types describing key and
-                  value. It's possible to select whether to dump only BTF
-                  type(s) associated with key (**key**), value (**value**),
-                  both key and value (**kv**), or all BTF types present in
-                  associated BTF object (**all**). If not specified, **kv**
-                  is assumed.
+		  When **map** is provided, it's expected that map has
+		  associated BTF object with BTF types describing key and
+		  value. It's possible to select whether to dump only BTF
+		  type(s) associated with key (**key**), value (**value**),
+		  both key and value (**kv**), or all BTF types present in
+		  associated BTF object (**all**). If not specified, **kv**
+		  is assumed.
 
-                  When **prog** is provided, it's expected that program has
-                  associated BTF object with BTF types.
+		  When **prog** is provided, it's expected that program has
+		  associated BTF object with BTF types.
 
-                  When specifying *FILE*, an ELF file is expected, containing
-                  .BTF section with well-defined BTF binary format data,
-                  typically produced by clang or pahole.
+		  When specifying *FILE*, an ELF file is expected, containing
+		  .BTF section with well-defined BTF binary format data,
+		  typically produced by clang or pahole.
+
+		  **format** option can be used to override default (raw)
+		  output format. Raw (**raw**) or C-syntax (**c**) output
+		  formats are supported.
 
 	**bpftool btf help**
 		  Print short help message.
-- 
cgit v1.2.3-59-g8ed1b


From 90eea4086d5ed31936889a44d536bf77afa4ca8a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 24 May 2019 11:59:07 -0700
Subject: bpftool: update bash-completion w/ new c option for btf dump

Add bash completion for new C btf dump option.

Cc: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/bash-completion/bpftool | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 50e402a5a9c8..75c01eafd3a1 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -638,11 +638,24 @@ _bpftool()
                             esac
                             return 0
                             ;;
+                        format)
+                            COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
+                            ;;
                         *)
-                            if [[ $cword == 6 ]] && [[ ${words[3]} == "map" ]]; then
-                                 COMPREPLY+=( $( compgen -W 'key value kv all' -- \
-                                     "$cur" ) )
-                            fi
+                            # emit extra options
+                            case ${words[3]} in
+                                id|file)
+                                    _bpftool_once_attr 'format'
+                                    ;;
+                                map|prog)
+                                    if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then
+                                        COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) )
+                                    fi
+                                    _bpftool_once_attr 'format'
+                                    ;;
+                                *)
+                                    ;;
+                            esac
                             return 0
                             ;;
                     esac
-- 
cgit v1.2.3-59-g8ed1b


From edaccf8985305967c22903a78283c8c837ea48dd Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 May 2019 14:47:46 -0700
Subject: tools/bpf: sync bpf uapi header bpf.h to tools directory

The bpf uapi header include/uapi/linux/bpf.h is sync'ed
to tools/include/uapi/linux/bpf.h.

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 63e0cf66f01a..68d4470523a0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2672,6 +2672,20 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf-local-storage cannot be found.
+ *
+ * int bpf_send_signal(u32 sig)
+ *	Description
+ *		Send signal *sig* to the current task.
+ *	Return
+ *		0 on success or successfully queued.
+ *
+ *		**-EBUSY** if work queue under nmi is full.
+ *
+ *		**-EINVAL** if *sig* is invalid.
+ *
+ *		**-EPERM** if no permission to send the *sig*.
+ *
+ *		**-EAGAIN** if bpf program can try again.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2782,7 +2796,8 @@ union bpf_attr {
 	FN(strtol),			\
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
-	FN(sk_storage_delete),
+	FN(sk_storage_delete),		\
+	FN(send_signal),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3-59-g8ed1b


From 16f0efc3b46352018c297bbdb2c405e7d8a63095 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 23 May 2019 14:47:47 -0700
Subject: tools/bpf: add selftest in test_progs for bpf_send_signal() helper

The test covered both nmi and tracepoint perf events.
  $ ./test_progs
  ...
  test_send_signal_tracepoint:PASS:tracepoint 0 nsec
  ...
  test_send_signal_common:PASS:tracepoint 0 nsec
  ...
  test_send_signal_common:PASS:perf_event 0 nsec
  ...
  test_send_signal:OK

Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/bpf_helpers.h          |   1 +
 .../testing/selftests/bpf/prog_tests/send_signal.c | 198 +++++++++++++++++++++
 .../selftests/bpf/progs/test_send_signal_kern.c    |  51 ++++++
 3 files changed, 250 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/send_signal.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_send_signal_kern.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 4b27840b8109..e6d243b7cd74 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -224,6 +224,7 @@ static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk,
 	(void *) BPF_FUNC_sk_storage_get;
 static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) =
 	(void *)BPF_FUNC_sk_storage_delete;
+static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
new file mode 100644
index 000000000000..67cea1686305
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+static volatile int sigusr1_received = 0;
+
+static void sigusr1_handler(int signum)
+{
+	sigusr1_received++;
+}
+
+static int test_send_signal_common(struct perf_event_attr *attr,
+				    int prog_type,
+				    const char *test_name)
+{
+	int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd;
+	const char *file = "./test_send_signal_kern.o";
+	struct bpf_object *obj = NULL;
+	int pipe_c2p[2], pipe_p2c[2];
+	__u32 key = 0, duration = 0;
+	char buf[256];
+	pid_t pid;
+	__u64 val;
+
+	if (CHECK(pipe(pipe_c2p), test_name,
+		  "pipe pipe_c2p error: %s\n", strerror(errno)))
+		goto no_fork_done;
+
+	if (CHECK(pipe(pipe_p2c), test_name,
+		  "pipe pipe_p2c error: %s\n", strerror(errno))) {
+		close(pipe_c2p[0]);
+		close(pipe_c2p[1]);
+		goto no_fork_done;
+	}
+
+	pid = fork();
+	if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+		close(pipe_c2p[0]);
+		close(pipe_c2p[1]);
+		close(pipe_p2c[0]);
+		close(pipe_p2c[1]);
+		goto no_fork_done;
+	}
+
+	if (pid == 0) {
+		/* install signal handler and notify parent */
+		signal(SIGUSR1, sigusr1_handler);
+
+		close(pipe_c2p[0]); /* close read */
+		close(pipe_p2c[1]); /* close write */
+
+		/* notify parent signal handler is installed */
+		write(pipe_c2p[1], buf, 1);
+
+		/* make sure parent enabled bpf program to send_signal */
+		read(pipe_p2c[0], buf, 1);
+
+		/* wait a little for signal handler */
+		sleep(1);
+
+		if (sigusr1_received)
+			write(pipe_c2p[1], "2", 1);
+		else
+			write(pipe_c2p[1], "0", 1);
+
+		/* wait for parent notification and exit */
+		read(pipe_p2c[0], buf, 1);
+
+		close(pipe_c2p[1]);
+		close(pipe_p2c[0]);
+		exit(0);
+	}
+
+	close(pipe_c2p[1]); /* close write */
+	close(pipe_p2c[0]); /* close read */
+
+	err = bpf_prog_load(file, prog_type, &obj, &prog_fd);
+	if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n",
+		  strerror(errno)))
+		goto prog_load_failure;
+
+	pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
+			 -1 /* group id */, 0 /* flags */);
+	if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
+		  strerror(errno))) {
+		err = -1;
+		goto close_prog;
+	}
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n",
+		  strerror(errno)))
+		goto disable_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n",
+		  strerror(errno)))
+		goto disable_pmu;
+
+	err = -1;
+	info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map");
+	if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map"))
+		goto disable_pmu;
+
+	status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map");
+	if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map"))
+		goto disable_pmu;
+
+	/* wait until child signal handler installed */
+	read(pipe_c2p[0], buf, 1);
+
+	/* trigger the bpf send_signal */
+	key = 0;
+	val = (((__u64)(SIGUSR1)) << 32) | pid;
+	bpf_map_update_elem(info_map_fd, &key, &val, 0);
+
+	/* notify child that bpf program can send_signal now */
+	write(pipe_p2c[1], buf, 1);
+
+	/* wait for result */
+	err = read(pipe_c2p[0], buf, 1);
+	if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+		goto disable_pmu;
+	if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+		err = -1;
+		goto disable_pmu;
+	}
+
+	err = CHECK(buf[0] != '2', test_name, "incorrect result\n");
+
+	/* notify child safe to exit */
+	write(pipe_p2c[1], buf, 1);
+
+disable_pmu:
+	close(pmu_fd);
+close_prog:
+	bpf_object__close(obj);
+prog_load_failure:
+	close(pipe_c2p[0]);
+	close(pipe_p2c[1]);
+	wait(NULL);
+no_fork_done:
+	return err;
+}
+
+static int test_send_signal_tracepoint(void)
+{
+	const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id";
+	struct perf_event_attr attr = {
+		.type = PERF_TYPE_TRACEPOINT,
+		.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN,
+		.sample_period = 1,
+		.wakeup_events = 1,
+	};
+	__u32 duration = 0;
+	int bytes, efd;
+	char buf[256];
+
+	efd = open(id_path, O_RDONLY, 0);
+	if (CHECK(efd < 0, "tracepoint",
+		  "open syscalls/sys_enter_nanosleep/id failure: %s\n",
+		  strerror(errno)))
+		return -1;
+
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint",
+		  "read syscalls/sys_enter_nanosleep/id failure: %s\n",
+		  strerror(errno)))
+		return -1;
+
+	attr.config = strtol(buf, NULL, 0);
+
+	return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint");
+}
+
+static int test_send_signal_nmi(void)
+{
+	struct perf_event_attr attr = {
+		.sample_freq = 50,
+		.freq = 1,
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+	};
+
+	return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, "perf_event");
+}
+
+void test_send_signal(void)
+{
+	int ret = 0;
+
+	ret |= test_send_signal_tracepoint();
+	ret |= test_send_signal_nmi();
+	if (!ret)
+		printf("test_send_signal:OK\n");
+	else
+		printf("test_send_signal:FAIL\n");
+}
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
new file mode 100644
index 000000000000..45a1a1a2c345
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") info_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(info_map, __u32, __u64);
+
+struct bpf_map_def SEC("maps") status_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+BPF_ANNOTATE_KV_PAIR(status_map, __u32, __u64);
+
+SEC("send_signal_demo")
+int bpf_send_signal_test(void *ctx)
+{
+	__u64 *info_val, *status_val;
+	__u32 key = 0, pid, sig;
+	int ret;
+
+	status_val = bpf_map_lookup_elem(&status_map, &key);
+	if (!status_val || *status_val != 0)
+		return 0;
+
+	info_val = bpf_map_lookup_elem(&info_map, &key);
+	if (!info_val || *info_val == 0)
+		return 0;
+
+	sig = *info_val >> 32;
+	pid = *info_val & 0xffffFFFF;
+
+	if ((bpf_get_current_pid_tgid() >> 32) == pid) {
+		ret = bpf_send_signal(sig);
+		if (ret == 0)
+			*status_val = 1;
+	}
+
+	return 0;
+}
+char __license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From 9ce33e337facc678a9578f256752e39643dedf21 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Fri, 24 May 2019 23:25:17 +0100
Subject: tools: bpf: sync uapi header bpf.h

Sync new bpf prog load flag "BPF_F_TEST_RND_HI32" to tools/.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 68d4470523a0..7c6aef253173 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -260,6 +260,24 @@ enum bpf_attach_type {
  */
 #define BPF_F_ANY_ALIGNMENT	(1U << 1)
 
+/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
+ * Verifier does sub-register def/use analysis and identifies instructions whose
+ * def only matters for low 32-bit, high 32-bit is never referenced later
+ * through implicit zero extension. Therefore verifier notifies JIT back-ends
+ * that it is safe to ignore clearing high 32-bit for these instructions. This
+ * saves some back-ends a lot of code-gen. However such optimization is not
+ * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
+ * hence hasn't used verifier's analysis result. But, we really want to have a
+ * way to be able to verify the correctness of the described optimization on
+ * x86_64 on which testsuites are frequently exercised.
+ *
+ * So, this flag is introduced. Once it is set, verifier will randomize high
+ * 32-bit for those instructions who has been identified as safe to ignore them.
+ * Then, if verifier is not doing correct analysis, such randomization will
+ * regress tests to expose bugs.
+ */
+#define BPF_F_TEST_RND_HI32	(1U << 2)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * two extensions:
  *
-- 
cgit v1.2.3-59-g8ed1b


From 046561981b948e07df096a8402f9efc80bc784d9 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Fri, 24 May 2019 23:25:19 +0100
Subject: libbpf: add "prog_flags" to
 bpf_program/bpf_prog_load_attr/bpf_load_program_attr

libbpf doesn't allow passing "prog_flags" during bpf program load in a
couple of load related APIs, "bpf_load_program_xattr", "load_program" and
"bpf_prog_load_xattr".

It makes sense to allow passing "prog_flags" which is useful for
customizing program loading.

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf.c    | 1 +
 tools/lib/bpf/bpf.h    | 1 +
 tools/lib/bpf/libbpf.c | 3 +++
 tools/lib/bpf/libbpf.h | 1 +
 4 files changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c4a48086dc9a..0d4b4fe10a84 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -256,6 +256,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
 	if (load_attr->name)
 		memcpy(attr.prog_name, load_attr->name,
 		       min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.prog_flags = load_attr->prog_flags;
 
 	fd = sys_bpf_prog_load(&attr, sizeof(attr));
 	if (fd >= 0)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 9593fec75652..ff42ca043dc8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -87,6 +87,7 @@ struct bpf_load_program_attr {
 	const void *line_info;
 	__u32 line_info_cnt;
 	__u32 log_level;
+	__u32 prog_flags;
 };
 
 /* Flags to direct loading requirements */
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 197b574406b3..ff149372b3c0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -188,6 +188,7 @@ struct bpf_program {
 	void *line_info;
 	__u32 line_info_rec_size;
 	__u32 line_info_cnt;
+	__u32 prog_flags;
 };
 
 enum libbpf_map_type {
@@ -2076,6 +2077,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.line_info_rec_size = prog->line_info_rec_size;
 	load_attr.line_info_cnt = prog->line_info_cnt;
 	load_attr.log_level = prog->log_level;
+	load_attr.prog_flags = prog->prog_flags;
 	if (!load_attr.insns || !load_attr.insns_cnt)
 		return -EINVAL;
 
@@ -3521,6 +3523,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 						      expected_attach_type);
 
 		prog->log_level = attr->log_level;
+		prog->prog_flags = attr->prog_flags;
 		if (!first_prog)
 			first_prog = prog;
 	}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index c5ff00515ce7..5abc2375defd 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -320,6 +320,7 @@ struct bpf_prog_load_attr {
 	enum bpf_attach_type expected_attach_type;
 	int ifindex;
 	int log_level;
+	int prog_flags;
 };
 
 LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-- 
cgit v1.2.3-59-g8ed1b


From f3b55abb6d5a522228e136c3bc4a9a716d5d8a54 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Fri, 24 May 2019 23:25:20 +0100
Subject: selftests: bpf: adjust several test_verifier helpers for insn
 insertion

- bpf_fill_ld_abs_vlan_push_pop:
    Prevent zext happens inside PUSH_CNT loop. This could happen because
    of BPF_LD_ABS (32-bit def) + BPF_JMP (64-bit use), or BPF_LD_ABS +
    EXIT (64-bit use of R0). So, change BPF_JMP to BPF_JMP32 and redefine
    R0 at exit path to cut off the data-flow from inside the loop.

  - bpf_fill_jump_around_ld_abs:
    Jump range is limited to 16 bit. every ld_abs is replaced by 6 insns,
    but on arches like arm, ppc etc, there will be one BPF_ZEXT inserted
    to extend the error value of the inlined ld_abs sequence which then
    contains 7 insns. so, set the dividend to 7 so the testcase could
    work on all arches.

  - bpf_fill_scale1/bpf_fill_scale2:
    Both contains ~1M BPF_ALU32_IMM which will trigger ~1M insn patcher
    call because of hi32 randomization later when BPF_F_TEST_RND_HI32 is
    set for bpf selftests. Insn patcher is not efficient that 1M call to
    it will hang computer. So , change to BPF_ALU64_IMM to avoid hi32
    randomization.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6e2fec84c929..fa9b5bfe5d9f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -138,32 +138,36 @@ static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
 loop:
 	for (j = 0; j < PUSH_CNT; j++) {
 		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		/* jump to error label */
+		insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3);
 		i++;
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
 		insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
 		insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_skb_vlan_push),
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
 		i++;
 	}
 
 	for (j = 0; j < PUSH_CNT; j++) {
 		insn[i++] = BPF_LD_ABS(BPF_B, 0);
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		insn[i] = BPF_JMP32_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 3);
 		i++;
 		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
 		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 					 BPF_FUNC_skb_vlan_pop),
-		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
 		i++;
 	}
 	if (++k < 5)
 		goto loop;
 
-	for (; i < len - 1; i++)
-		insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+	for (; i < len - 3; i++)
+		insn[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+	insn[len - 3] = BPF_JMP_A(1);
+	/* error label */
+	insn[len - 2] = BPF_MOV32_IMM(BPF_REG_0, 0);
 	insn[len - 1] = BPF_EXIT_INSN();
 	self->prog_len = len;
 }
@@ -171,8 +175,13 @@ loop:
 static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 {
 	struct bpf_insn *insn = self->fill_insns;
-	/* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns */
-	unsigned int len = (1 << 15) / 6;
+	/* jump range is limited to 16 bit. every ld_abs is replaced by 6 insns,
+	 * but on arches like arm, ppc etc, there will be one BPF_ZEXT inserted
+	 * to extend the error value of the inlined ld_abs sequence which then
+	 * contains 7 insns. so, set the dividend to 7 so the testcase could
+	 * work on all arches.
+	 */
+	unsigned int len = (1 << 15) / 7;
 	int i = 0;
 
 	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
@@ -232,7 +241,7 @@ static void bpf_fill_scale1(struct bpf_test *self)
 	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
 	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
-		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
 	self->retval = 42;
@@ -264,7 +273,7 @@ static void bpf_fill_scale2(struct bpf_test *self)
 	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
 	 */
 	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
-		insn[i++] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 42);
+		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
 	self->retval = 42;
-- 
cgit v1.2.3-59-g8ed1b


From 9d120b4127e8df04ee665241916056b0156b37c7 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Fri, 24 May 2019 23:25:21 +0100
Subject: selftests: bpf: enable hi32 randomization for all tests

The previous libbpf patch allows user to specify "prog_flags" to bpf
program load APIs. To enable high 32-bit randomization for a test, we need
to set BPF_F_TEST_RND_HI32 in "prog_flags".

To enable such randomization for all tests, we need to make sure all places
are passing BPF_F_TEST_RND_HI32. Changing them one by one is not
convenient, also, it would be better if a test could be switched to
"normal" running mode without code change.

Given the program load APIs used across bpf selftests are mostly:
  bpf_prog_load:      load from file
  bpf_load_program:   load from raw insns

A test_stub.c is implemented for bpf seltests, it offers two functions for
testing purpose:

  bpf_prog_test_load
  bpf_test_load_program

The are the same as "bpf_prog_load" and "bpf_load_program", except they
also set BPF_F_TEST_RND_HI32. Given *_xattr functions are the APIs to
customize any "prog_flags", it makes little sense to put these two
functions into libbpf.

Then, the following CFLAGS are passed to compilations for host programs:
  -Dbpf_prog_load=bpf_prog_test_load
  -Dbpf_load_program=bpf_test_load_program

They migrate the used load APIs to the test version, hence enable high
32-bit randomization for these tests without changing source code.

Besides all these, there are several testcases are using
"bpf_prog_load_attr" directly, their call sites are updated to pass
BPF_F_TEST_RND_HI32.

Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               | 10 +++---
 .../selftests/bpf/prog_tests/bpf_verif_scale.c     |  1 +
 tools/testing/selftests/bpf/test_sock_addr.c       |  1 +
 tools/testing/selftests/bpf/test_sock_fields.c     |  1 +
 tools/testing/selftests/bpf/test_socket_cookie.c   |  1 +
 tools/testing/selftests/bpf/test_stub.c            | 40 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_verifier.c        |  2 +-
 7 files changed, 51 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_stub.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index cd23758e8b7a..fa002da36d0d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -15,7 +15,9 @@ LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 LLVM_READELF	?= llvm-readelf
 BTF_PAHOLE	?= pahole
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
+	  -Dbpf_prog_load=bpf_prog_test_load \
+	  -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lrt -lpthread
 
 # Order correspond to 'make run_tests' order
@@ -79,9 +81,9 @@ $(OUTPUT)/test_maps: map_tests/*.c
 
 BPFOBJ := $(OUTPUT)/libbpf.a
 
-$(TEST_GEN_PROGS): $(BPFOBJ)
+$(TEST_GEN_PROGS): test_stub.o $(BPFOBJ)
 
-$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+$(TEST_GEN_PROGS_EXTENDED): test_stub.o $(OUTPUT)/libbpf.a
 
 $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
 $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
@@ -177,7 +179,7 @@ $(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\
 						$(ALU32_BUILD_DIR)/urandom_read
 	$(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \
 		-o $(ALU32_BUILD_DIR)/test_progs_32 \
-		test_progs.c trace_helpers.c prog_tests/*.c \
+		test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \
 		$(OUTPUT)/libbpf.a $(LDLIBS)
 
 $(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index ce03289c9077..c0091137074b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -22,6 +22,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
 	attr.file = file;
 	attr.prog_type = type;
 	attr.log_level = 4;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
 	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
 	bpf_object__close(obj);
 	if (err)
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 3f110eaaf29c..5d0c4f0baeff 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -745,6 +745,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 	attr.file = path;
 	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
 	attr.expected_attach_type = test->expected_attach_type;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
 
 	if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
 		if (test->expected_result != LOAD_REJECT)
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
index e089477fa0a3..f0fc103261a4 100644
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/test_sock_fields.c
@@ -414,6 +414,7 @@ int main(int argc, char **argv)
 	struct bpf_prog_load_attr attr = {
 		.file = "test_sock_fields_kern.o",
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+		.prog_flags = BPF_F_TEST_RND_HI32,
 	};
 	int cgroup_fd, egress_fd, ingress_fd, err;
 	struct bpf_program *ingress_prog;
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index e51d63786ff8..cac8ee57a013 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -148,6 +148,7 @@ static int run_test(int cgfd)
 	memset(&attr, 0, sizeof(attr));
 	attr.file = SOCKET_COOKIE_PROG;
 	attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
 
 	err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
 	if (err) {
diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c
new file mode 100644
index 000000000000..84e81a89e2f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stub.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <string.h>
+
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+		       struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_prog_load_attr attr;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = file;
+	attr.prog_type = type;
+	attr.expected_attach_type = 0;
+	attr.prog_flags = BPF_F_TEST_RND_HI32;
+
+	return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+			  size_t insns_cnt, const char *license,
+			  __u32 kern_version, char *log_buf,
+		     size_t log_buf_sz)
+{
+	struct bpf_load_program_attr load_attr;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = type;
+	load_attr.expected_attach_type = 0;
+	load_attr.name = NULL;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	load_attr.kern_version = kern_version;
+	load_attr.prog_flags = BPF_F_TEST_RND_HI32;
+
+	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index fa9b5bfe5d9f..cd0248c54e25 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -882,7 +882,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (fixup_skips != skips)
 		return;
 
-	pflags = 0;
+	pflags = BPF_F_TEST_RND_HI32;
 	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
-- 
cgit v1.2.3-59-g8ed1b


From 99513cfa16c6f54471f522d897ce48dbe4f2bb43 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 24 May 2019 16:37:07 -0700
Subject: selftest: Fixes for icmp_redirect test

I was really surprised that the IPv6 mtu exception followed by redirect
test was passing as nothing about the code suggests it should. The problem
is actually with the logic in the test script.

Fix the test cases as follows:
1. add debug function to dump the initial and redirect gateway addresses
   for ipv6. This is shown only in verbose mode. It helps verify the
   output of 'route get'.

2. fix the check_exception logic for the reset case to make sure that
   for IPv4 neither mtu nor redirect appears in the 'route get' output.
   For IPv6, make sure mtu is not present and the gateway is the initial
   R1 lladdr.

3. fix the reset logic by using a function to delete the routes added by
   initial_route_*. This format works better for the nexthop version of
   the tests.

While improving the test cases, go ahead and ensure that forwarding is
disabled since IPv6 redirect requires it.

Also, runs with kernel debugging enabled sometimes show a failure with
one of the ipv4 tests, so spread the pings over longer time interval.

The end result is that 2 tests now show failures:

TEST: IPv6: mtu exception plus redirect                    [FAIL]

and the VRF version.

This is a bug in the IPv6 logic that will need to be fixed
separately. Redirect followed by MTU works because __ip6_rt_update_pmtu
hits the 'if (!rt6_cache_allowed_for_pmtu(rt6))' path and updates the
mtu on the exception rt6_info.

MTU followed by redirect does not have this logic. rt6_do_redirect
creates a new exception and then rt6_insert_exception removes the old
one which has the MTU exception.

Fixes: ec8105352869 ("selftests: Add redirect tests")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/icmp_redirect.sh | 46 +++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index e8d8496f5b16..76a7c4472dc3 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -80,6 +80,13 @@ log_test()
 	fi
 }
 
+log_debug()
+{
+	if [ "$VERBOSE" = "1" ]; then
+		echo "$*"
+	fi
+}
+
 run_cmd()
 {
 	local cmd="$*"
@@ -167,6 +174,7 @@ setup()
 
 		case "${ns}" in
 		h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
 		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
 		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
 			;;
@@ -250,12 +258,14 @@ setup()
 		echo "Error: Failed to get link-local address of r1's eth0"
 		exit 1
 	fi
+	log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}"
 
 	R2_LLADDR=$(get_linklocal r2 eth0)
 	if [ $? -ne 0 ]; then
 		echo "Error: Failed to get link-local address of r2's eth0"
 		exit 1
 	fi
+	log_debug "initial gateway is R2's lladdr = ${R2_LLADDR}"
 }
 
 change_h2_mtu()
@@ -289,15 +299,26 @@ check_exception()
 		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
 		grep -q "cache expires [0-9]*sec${mtu}"
 	else
+		# want to verify that neither mtu nor redirected appears in
+		# the route get output. The -v will wipe out the cache line
+		# if either are set so the last grep -q will not find a match
 		ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
-		grep -q "cache"
+		grep -E -v 'mtu|redirected' | grep -q "cache"
 	fi
 	log_test $? 0 "IPv4: ${desc}"
 
 	if [ "$with_redirect" = "yes" ]; then
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+	elif [ -n "${mtu}" ]; then
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -q "${mtu}"
 	else
-		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | grep -q "${mtu}"
+		# IPv6 is a bit harder. First strip out the match if it
+		# contains an mtu exception and then look for the first
+		# gateway - R1's lladdr
+		ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+		grep -v "mtu" | grep -q "${R1_LLADDR}"
 	fi
 	log_test $? 0 "IPv6: ${desc}"
 }
@@ -306,8 +327,8 @@ run_ping()
 {
 	local sz=$1
 
-	run_cmd ip netns exec h1 ping -q -M want -i 0.2 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
-	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.2 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+	run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
 }
 
 replace_route_legacy()
@@ -317,6 +338,17 @@ replace_route_legacy()
 	run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
 }
 
+reset_route_legacy()
+{
+	run_cmd ip -netns r1    ro del ${H2_N2}
+	run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+
+	run_cmd ip -netns h1    ro del ${H1_VRF_ARG} ${H2_N2}
+	run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
+
+	initial_route_legacy
+}
+
 initial_route_legacy()
 {
 	# r1 to h2 via r2 and eth1
@@ -373,9 +405,7 @@ do_test()
 
 	# remove exceptions and restore routing
 	change_h2_mtu 1500
-	ip -netns h1 li set br0 down
-	ip -netns h1 li set br0 up
-	eval initial_route_${ttype}
+	eval reset_route_${ttype}
 
 	check_connectivity
 	if [ $? -ne 0 ]; then
-- 
cgit v1.2.3-59-g8ed1b


From 0d97dacc46d9af2daba1af224747d452bd988365 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 25 May 2019 17:01:01 -0700
Subject: bpftool: auto-complete BTF IDs for btf dump

Auto-complete BTF IDs for `btf dump id` sub-command. List of possible BTF
IDs is scavenged from loaded BPF programs that have associated BTFs, as
there is currently no API in libbpf to fetch list of all BTFs in the
system.

Suggested-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/bash-completion/bpftool | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 75c01eafd3a1..fbbad0ed8e82 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -71,6 +71,12 @@ _bpftool_get_prog_tags()
         command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
 }
 
+_bpftool_get_btf_ids()
+{
+    COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
+        command sed -n 's/.*"btf_id": \(.*\),\?$/\1/p' )" -- "$cur" ) )
+}
+
 _bpftool_get_obj_map_names()
 {
     local obj
@@ -635,6 +641,9 @@ _bpftool()
                                 map)
                                     _bpftool_get_map_ids
                                     ;;
+                                dump)
+                                    _bpftool_get_btf_ids
+                                    ;;
                             esac
                             return 0
                             ;;
-- 
cgit v1.2.3-59-g8ed1b


From 92bd6820f2b11c2decdd92e1f9020e804dbc2ed1 Mon Sep 17 00:00:00 2001
From: Chang-Hsien Tsai <luke.tw@gmail.com>
Date: Sun, 26 May 2019 10:32:11 +0000
Subject: bpf: style fix in while(!feof()) loop

Use fgets() as the while loop condition.

Signed-off-by: Chang-Hsien Tsai <luke.tw@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/xlated_dumper.c           | 4 +---
 tools/testing/selftests/bpf/trace_helpers.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 0bb17bf88b18..494d7ae3614d 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -31,9 +31,7 @@ void kernel_syms_load(struct dump_data *dd)
 	if (!fp)
 		return;
 
-	while (!feof(fp)) {
-		if (!fgets(buff, sizeof(buff), fp))
-			break;
+	while (fgets(buff, sizeof(buff), fp)) {
 		tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1,
 				   sizeof(*dd->sym_mapping));
 		if (!tmp) {
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 9a9fc6c9b70b..b47f205f0310 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -30,9 +30,7 @@ int load_kallsyms(void)
 	if (!f)
 		return -ENOENT;
 
-	while (!feof(f)) {
-		if (!fgets(buf, sizeof(buf), f))
-			break;
+	while (fgets(buf, sizeof(buf), f)) {
 		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
 			break;
 		if (!addr)
-- 
cgit v1.2.3-59-g8ed1b


From d98363b510ce9938a19b07fe3c02357d8c65d52a Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hariprasad.kelam@gmail.com>
Date: Sat, 25 May 2019 14:32:57 +0530
Subject: libbpf: fix warning that PTR_ERR_OR_ZERO can be used

Fix below warning reported by coccicheck:

/tools/lib/bpf/libbpf.c:3461:1-3: WARNING: PTR_ERR_OR_ZERO can be used

Signed-off-by: Hariprasad Kelam <hariprasad.kelam@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ff149372b3c0..fd5905ac2ec3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3460,9 +3460,7 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
 
 long libbpf_get_error(const void *ptr)
 {
-	if (IS_ERR(ptr))
-		return PTR_ERR(ptr);
-	return 0;
+	return PTR_ERR_OR_ZERO(ptr);
 }
 
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
-- 
cgit v1.2.3-59-g8ed1b


From 775bc8ada89b376b4bbbce31aba47f4117fe1d9c Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 24 May 2019 11:36:46 +0100
Subject: tools: bpftool: add -d option to get debug output from libbpf

libbpf has three levels of priority for output messages: warn, info,
debug. By default, debug output is not printed to the console.

Add a new "--debug" (short name: "-d") option to bpftool to print libbpf
logs for all three levels.

Internally, we simply use the function provided by libbpf to replace the
default printing function by one that prints logs regardless of their
level.

v2:
- Remove the possibility to select the log-levels to use (v1 offered a
  combination of "warn", "info" and "debug").
- Rename option and offer a short name: -d|--debug.
- Add option description to all bpftool manual pages (instead of
  bpftool-prog.rst only), as all commands use libbpf.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-btf.rst     |  4 ++++
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst  |  4 ++++
 tools/bpf/bpftool/Documentation/bpftool-feature.rst |  4 ++++
 tools/bpf/bpftool/Documentation/bpftool-map.rst     |  4 ++++
 tools/bpf/bpftool/Documentation/bpftool-net.rst     |  4 ++++
 tools/bpf/bpftool/Documentation/bpftool-perf.rst    |  4 ++++
 tools/bpf/bpftool/Documentation/bpftool-prog.rst    |  4 ++++
 tools/bpf/bpftool/Documentation/bpftool.rst         |  3 +++
 tools/bpf/bpftool/bash-completion/bpftool           |  2 +-
 tools/bpf/bpftool/main.c                            | 14 +++++++++++++-
 10 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 3daed9eba766..6694a0fc8f99 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -72,6 +72,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 **# bpftool btf dump id 1226**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index ac26876389c2..36807735e2a5 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -113,6 +113,10 @@ OPTIONS
 	-f, --bpffs
 		  Show file names of pinned programs.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 |
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 14180e887082..4d08f35034a2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -73,6 +73,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 SEE ALSO
 ========
 	**bpf**\ (2),
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 13ef27b39f20..490b4501cb6e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -152,6 +152,10 @@ OPTIONS
 		  Do not automatically attempt to mount any virtual file system
 		  (such as tracefs or BPF virtual file system) when necessary.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 **# bpftool map show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 934580850f42..d8e5237a2085 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -65,6 +65,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index 0c7576523a21..e252bd0bc434 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -53,6 +53,10 @@ OPTIONS
 	-p, --pretty
 		  Generate human-readable JSON output. Implies **-j**.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index e8118544d118..9a92614569e6 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -174,6 +174,10 @@ OPTIONS
 		  Do not automatically attempt to mount any virtual file system
 		  (such as tracefs or BPF virtual file system) when necessary.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
+
 EXAMPLES
 ========
 **# bpftool prog show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 3e562d7fd56f..43dba0717953 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -66,6 +66,9 @@ OPTIONS
 		  Do not automatically attempt to mount any virtual file system
 		  (such as tracefs or BPF virtual file system) when necessary.
 
+	-d, --debug
+		  Print all logs available from libbpf, including debug-level
+		  information.
 
 SEE ALSO
 ========
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index fbbad0ed8e82..2725e27dfa42 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -187,7 +187,7 @@ _bpftool()
 
     # Deal with options
     if [[ ${words[cword]} == -* ]]; then
-        local c='--version --json --pretty --bpffs --mapcompat'
+        local c='--version --json --pretty --bpffs --mapcompat --debug'
         COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
         return 0
     fi
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1ac1fc520e6a..d74293938a05 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -10,6 +10,7 @@
 #include <string.h>
 
 #include <bpf.h>
+#include <libbpf.h>
 
 #include "main.h"
 
@@ -77,6 +78,13 @@ static int do_version(int argc, char **argv)
 	return 0;
 }
 
+static int __printf(2, 0)
+print_all_levels(__maybe_unused enum libbpf_print_level level,
+		 const char *format, va_list args)
+{
+	return vfprintf(stderr, format, args);
+}
+
 int cmd_select(const struct cmd *cmds, int argc, char **argv,
 	       int (*help)(int argc, char **argv))
 {
@@ -317,6 +325,7 @@ int main(int argc, char **argv)
 		{ "bpffs",	no_argument,	NULL,	'f' },
 		{ "mapcompat",	no_argument,	NULL,	'm' },
 		{ "nomount",	no_argument,	NULL,	'n' },
+		{ "debug",	no_argument,	NULL,	'd' },
 		{ 0 }
 	};
 	int opt, ret;
@@ -332,7 +341,7 @@ int main(int argc, char **argv)
 	hash_init(map_table.table);
 
 	opterr = 0;
-	while ((opt = getopt_long(argc, argv, "Vhpjfmn",
+	while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
 				  options, NULL)) >= 0) {
 		switch (opt) {
 		case 'V':
@@ -362,6 +371,9 @@ int main(int argc, char **argv)
 		case 'n':
 			block_mount = true;
 			break;
+		case 'd':
+			libbpf_set_print(print_all_levels);
+			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
 			if (json_output)
-- 
cgit v1.2.3-59-g8ed1b


From 60276f9849988d3d3a54943c9ec27222c5819dae Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 24 May 2019 11:36:47 +0100
Subject: libbpf: add bpf_object__load_xattr() API function to pass log_level

libbpf was recently made aware of the log_level attribute for programs,
used to specify the level of information expected to be dumped by the
verifier. Function bpf_prog_load_xattr() got support for this log_level
parameter.

But some applications using libbpf rely on another function to load
programs, bpf_object__load(), which does accept any parameter for log
level. Create an API function based on bpf_object__load(), but accepting
an "attr" object as a parameter. Then add a log_level field to that
object, so that applications calling the new bpf_object__load_xattr()
can pick the desired log level.

v3:
- Rewrite commit log.

v2:
- We are in a new cycle, bump libbpf extraversion number.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/Makefile   |  2 +-
 tools/lib/bpf/libbpf.c   | 20 +++++++++++++++++---
 tools/lib/bpf/libbpf.h   |  6 ++++++
 tools/lib/bpf/libbpf.map |  1 +
 4 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index a2aceadf68db..9312066a1ae3 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -3,7 +3,7 @@
 
 BPF_VERSION = 0
 BPF_PATCHLEVEL = 0
-BPF_EXTRAVERSION = 3
+BPF_EXTRAVERSION = 4
 
 MAKEFLAGS += --no-print-directory
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fd5905ac2ec3..ca4432f5b067 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2224,7 +2224,7 @@ static bool bpf_program__is_function_storage(struct bpf_program *prog,
 }
 
 static int
-bpf_object__load_progs(struct bpf_object *obj)
+bpf_object__load_progs(struct bpf_object *obj, int log_level)
 {
 	size_t i;
 	int err;
@@ -2232,6 +2232,7 @@ bpf_object__load_progs(struct bpf_object *obj)
 	for (i = 0; i < obj->nr_programs; i++) {
 		if (bpf_program__is_function_storage(&obj->programs[i], obj))
 			continue;
+		obj->programs[i].log_level = log_level;
 		err = bpf_program__load(&obj->programs[i],
 					obj->license,
 					obj->kern_version);
@@ -2383,10 +2384,14 @@ int bpf_object__unload(struct bpf_object *obj)
 	return 0;
 }
 
-int bpf_object__load(struct bpf_object *obj)
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 {
+	struct bpf_object *obj;
 	int err;
 
+	if (!attr)
+		return -EINVAL;
+	obj = attr->obj;
 	if (!obj)
 		return -EINVAL;
 
@@ -2399,7 +2404,7 @@ int bpf_object__load(struct bpf_object *obj)
 
 	CHECK_ERR(bpf_object__create_maps(obj), err, out);
 	CHECK_ERR(bpf_object__relocate(obj), err, out);
-	CHECK_ERR(bpf_object__load_progs(obj), err, out);
+	CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
 
 	return 0;
 out:
@@ -2408,6 +2413,15 @@ out:
 	return err;
 }
 
+int bpf_object__load(struct bpf_object *obj)
+{
+	struct bpf_object_load_attr attr = {
+		.obj = obj,
+	};
+
+	return bpf_object__load_xattr(&attr);
+}
+
 static int check_path(const char *path)
 {
 	char *cp, errmsg[STRERR_BUFSIZE];
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5abc2375defd..1af0d48178c8 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -89,8 +89,14 @@ LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
 LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
 LIBBPF_API void bpf_object__close(struct bpf_object *object);
 
+struct bpf_object_load_attr {
+	struct bpf_object *obj;
+	int log_level;
+};
+
 /* Load/unload object into/from kernel */
 LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
 LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
 LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
 LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 8bf51d0a6072..46dcda89df21 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -171,4 +171,5 @@ LIBBPF_0.0.4 {
 		btf_dump__free;
 		btf_dump__new;
 		btf__parse_elf;
+		bpf_object__load_xattr;
 } LIBBPF_0.0.3;
-- 
cgit v1.2.3-59-g8ed1b


From 55d778076b0354b088a9a16d9ff584c887e17f42 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 24 May 2019 11:36:48 +0100
Subject: tools: bpftool: make -d option print debug output from verifier

The "-d" option is used to require all logs available for bpftool. So
far it meant telling libbpf to print even debug-level information. But
there is another source of info that can be made more verbose: when we
attemt to load programs with bpftool, we can pass a log_level parameter
to the verifier in order to control the amount of information that is
printed to the console.

Reuse the "-d" option to print all information the verifier can tell. At
this time, this means logs related to BPF_LOG_LEVEL1, BPF_LOG_LEVEL2 and
BPF_LOG_STATS. As mentioned in the discussion on the first version of
this set, these macros are internal to the kernel
(include/linux/bpf_verifier.h) and are not meant to be part of the
stable user API, therefore we simply use the related constants to print
whatever we can at this time, without trying to tell users what is
log_level1 or what is statistics.

Verifier logs are only used when loading programs for now (In the
future: for loading BTF objects with bpftool? Although libbpf does not
currently offer to print verifier info at debug level if no error
occurred when loading BTF objects), so bpftool.rst and bpftool-prog.rst
are the only man pages to get the update.

v3:
- Add details on log level and BTF loading at the end of commit log.

v2:
- Remove the possibility to select the log levels to use (v1 offered a
  combination of "log_level1", "log_level2" and "stats").
- The macros from kernel header bpf_verifier.h are not used (and
  therefore not moved to UAPI header).
- In v1 this was a distinct option, but is now merged in the only "-d"
  switch to activate libbpf and verifier debug-level logs all at the
  same time.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst |  5 +++--
 tools/bpf/bpftool/Documentation/bpftool.rst      |  5 +++--
 tools/bpf/bpftool/main.c                         |  2 ++
 tools/bpf/bpftool/main.h                         |  1 +
 tools/bpf/bpftool/prog.c                         | 27 +++++++++++++++---------
 5 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 9a92614569e6..228a5c863cc7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -175,8 +175,9 @@ OPTIONS
 		  (such as tracefs or BPF virtual file system) when necessary.
 
 	-d, --debug
-		  Print all logs available from libbpf, including debug-level
-		  information.
+		  Print all logs available, even debug-level information. This
+		  includes logs from libbpf as well as from the verifier, when
+		  attempting to load programs.
 
 EXAMPLES
 ========
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 43dba0717953..6a9c52ef84a9 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -67,8 +67,9 @@ OPTIONS
 		  (such as tracefs or BPF virtual file system) when necessary.
 
 	-d, --debug
-		  Print all logs available from libbpf, including debug-level
-		  information.
+		  Print all logs available, even debug-level information. This
+		  includes logs from libbpf as well as from the verifier, when
+		  attempting to load programs.
 
 SEE ALSO
 ========
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index d74293938a05..4879f6395c7e 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -26,6 +26,7 @@ bool pretty_output;
 bool json_output;
 bool show_pinned;
 bool block_mount;
+bool verifier_logs;
 int bpf_flags;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
@@ -373,6 +374,7 @@ int main(int argc, char **argv)
 			break;
 		case 'd':
 			libbpf_set_print(print_all_levels);
+			verifier_logs = true;
 			break;
 		default:
 			p_err("unrecognized option '%s'", argv[optind - 1]);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 3d63feb7f852..28a2a5857e14 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -91,6 +91,7 @@ extern json_writer_t *json_wtr;
 extern bool json_output;
 extern bool show_pinned;
 extern bool block_mount;
+extern bool verifier_logs;
 extern int bpf_flags;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 26336bad0442..1f209c80d906 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -750,10 +750,11 @@ static int do_detach(int argc, char **argv)
 
 static int load_with_options(int argc, char **argv, bool first_prog_only)
 {
-	enum bpf_attach_type expected_attach_type;
-	struct bpf_object_open_attr attr = {
-		.prog_type	= BPF_PROG_TYPE_UNSPEC,
+	struct bpf_object_load_attr load_attr = { 0 };
+	struct bpf_object_open_attr open_attr = {
+		.prog_type = BPF_PROG_TYPE_UNSPEC,
 	};
+	enum bpf_attach_type expected_attach_type;
 	struct map_replace *map_replace = NULL;
 	struct bpf_program *prog = NULL, *pos;
 	unsigned int old_map_fds = 0;
@@ -767,7 +768,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
 	if (!REQ_ARGS(2))
 		return -1;
-	attr.file = GET_ARG();
+	open_attr.file = GET_ARG();
 	pinfile = GET_ARG();
 
 	while (argc) {
@@ -776,7 +777,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
 			NEXT_ARG();
 
-			if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
+			if (open_attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
 				p_err("program type already specified");
 				goto err_free_reuse_maps;
 			}
@@ -793,7 +794,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 			strcat(type, *argv);
 			strcat(type, "/");
 
-			err = libbpf_prog_type_by_name(type, &attr.prog_type,
+			err = libbpf_prog_type_by_name(type,
+						       &open_attr.prog_type,
 						       &expected_attach_type);
 			free(type);
 			if (err < 0)
@@ -881,16 +883,16 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 
 	set_max_rlimit();
 
-	obj = __bpf_object__open_xattr(&attr, bpf_flags);
+	obj = __bpf_object__open_xattr(&open_attr, bpf_flags);
 	if (IS_ERR_OR_NULL(obj)) {
 		p_err("failed to open object file");
 		goto err_free_reuse_maps;
 	}
 
 	bpf_object__for_each_program(pos, obj) {
-		enum bpf_prog_type prog_type = attr.prog_type;
+		enum bpf_prog_type prog_type = open_attr.prog_type;
 
-		if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
+		if (open_attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
 			const char *sec_name = bpf_program__title(pos, false);
 
 			err = libbpf_prog_type_by_name(sec_name, &prog_type,
@@ -960,7 +962,12 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
 		goto err_close_obj;
 	}
 
-	err = bpf_object__load(obj);
+	load_attr.obj = obj;
+	if (verifier_logs)
+		/* log_level1 + log_level2 + stats, but not stable UAPI */
+		load_attr.log_level = 1 + 2 + 4;
+
+	err = bpf_object__load_xattr(&load_attr);
 	if (err) {
 		p_err("failed to load object file");
 		goto err_close_obj;
-- 
cgit v1.2.3-59-g8ed1b


From 486d3f22c0cab818ba6b4324fd8eda643d5f16b2 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 24 May 2019 15:28:56 -0700
Subject: selftests/bpf: fail test_tunnel.sh if subtests fail

Right now test_tunnel.sh always exits with success even if some
of the subtests fail. Since the output is very verbose, it's
hard to spot the issues with subtests. Let's fail the script
if any subtest fails.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_tunnel.sh | 32 ++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 546aee3e9fb4..bd12ec97a44d 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -696,30 +696,57 @@ check_err()
 
 bpf_tunnel_test()
 {
+	local errors=0
+
 	echo "Testing GRE tunnel..."
 	test_gre
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6GRE tunnel..."
 	test_ip6gre
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6GRETAP tunnel..."
 	test_ip6gretap
+	errors=$(( $errors + $? ))
+
 	echo "Testing ERSPAN tunnel..."
 	test_erspan v2
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6ERSPAN tunnel..."
 	test_ip6erspan v2
+	errors=$(( $errors + $? ))
+
 	echo "Testing VXLAN tunnel..."
 	test_vxlan
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6VXLAN tunnel..."
 	test_ip6vxlan
+	errors=$(( $errors + $? ))
+
 	echo "Testing GENEVE tunnel..."
 	test_geneve
+	errors=$(( $errors + $? ))
+
 	echo "Testing IP6GENEVE tunnel..."
 	test_ip6geneve
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPIP tunnel..."
 	test_ipip
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPIP6 tunnel..."
 	test_ipip6
+	errors=$(( $errors + $? ))
+
 	echo "Testing IPSec tunnel..."
 	test_xfrm_tunnel
+	errors=$(( $errors + $? ))
+
+	return $errors
 }
 
 trap cleanup 0 3 6
@@ -728,4 +755,9 @@ trap cleanup_exit 2 9
 cleanup
 bpf_tunnel_test
 
+if [ $? -ne 0 ]; then
+	echo -e "$(basename $0): ${RED}FAIL${NC}"
+	exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
 exit 0
-- 
cgit v1.2.3-59-g8ed1b


From ba0c0cc05dda2c56e23c88e4610ce2ac65ff86ec Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Sat, 25 May 2019 09:37:40 -0700
Subject: selftests/bpf: convert test_cgrp2_attach2 example into kselftest

Convert test_cgrp2_attach2 example into a proper test_cgroup_attach
kselftest. It's better because we do run kselftest on a constant
basis, so there are better chances to spot a potential regression.

Also make it slightly less verbose to conform kselftests output style.

Output example:
  $ ./test_cgroup_attach
  #override:PASS
  #multi:PASS
  test_cgroup_attach:PASS

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 samples/bpf/Makefile                             |   2 -
 samples/bpf/test_cgrp2_attach2.c                 | 459 ----------------------
 tools/testing/selftests/bpf/.gitignore           |   1 +
 tools/testing/selftests/bpf/Makefile             |   3 +-
 tools/testing/selftests/bpf/test_cgroup_attach.c | 475 +++++++++++++++++++++++
 5 files changed, 478 insertions(+), 462 deletions(-)
 delete mode 100644 samples/bpf/test_cgrp2_attach2.c
 create mode 100644 tools/testing/selftests/bpf/test_cgroup_attach.c

(limited to 'tools')

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4f0a1cdbfe7c..253e5a2856be 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -26,7 +26,6 @@ hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_attach2
 hostprogs-y += test_cgrp2_sock
 hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
@@ -81,7 +80,6 @@ map_perf_test-objs := bpf_load.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o test_overhead_user.o
 test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
 test_cgrp2_attach-objs := test_cgrp2_attach.o
-test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
 test_cgrp2_sock-objs := test_cgrp2_sock.o
 test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
 xdp1-objs := xdp1_user.o
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
deleted file mode 100644
index 0bb6507256b7..000000000000
--- a/samples/bpf/test_cgrp2_attach2.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/* eBPF example program:
- *
- * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
- *
- * - Loads eBPF program
- *
- *   The eBPF program accesses the map passed in to store two pieces of
- *   information. The number of invocations of the program, which maps
- *   to the number of packets received, is stored to key 0. Key 1 is
- *   incremented on each iteration by the number of bytes stored in
- *   the skb. The program also stores the number of received bytes
- *   in the cgroup storage.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- *
- * - Every second, reads map[0] and map[1] to see how many bytes and
- *   packets were seen on any socket of tasks in the given cgroup.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <unistd.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-
-#include "bpf_insn.h"
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-#define FOO		"/foo"
-#define BAR		"/foo/bar/"
-#define PING_CMD	"ping -c1 -w1 127.0.0.1 > /dev/null"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-static int prog_load(int verdict)
-{
-	int ret;
-	struct bpf_insn prog[] = {
-		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
-		BPF_EXIT_INSN(),
-	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
-	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
-			       prog, insns_cnt, "GPL", 0,
-			       bpf_log_buf, BPF_LOG_BUF_SIZE);
-
-	if (ret < 0) {
-		log_err("Loading program");
-		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
-		return 0;
-	}
-	return ret;
-}
-
-static int test_foo_bar(void)
-{
-	int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
-
-	allow_prog = prog_load(1);
-	if (!allow_prog)
-		goto err;
-
-	drop_prog = prog_load(0);
-	if (!drop_prog)
-		goto err;
-
-	if (setup_cgroup_environment())
-		goto err;
-
-	/* Create cgroup /foo, get fd, and join it */
-	foo = create_and_get_cgroup(FOO);
-	if (foo < 0)
-		goto err;
-
-	if (join_cgroup(FOO))
-		goto err;
-
-	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo");
-		goto err;
-	}
-
-	printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
-	assert(system(PING_CMD) != 0);
-
-	/* Create cgroup /foo/bar, get fd, and join it */
-	bar = create_and_get_cgroup(BAR);
-	if (bar < 0)
-		goto err;
-
-	if (join_cgroup(BAR))
-		goto err;
-
-	printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
-	assert(system(PING_CMD) != 0);
-
-	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo/bar");
-		goto err;
-	}
-
-	printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
-	assert(system(PING_CMD) == 0);
-
-	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching program from /foo/bar");
-		goto err;
-	}
-
-	printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
-	       "This ping in cgroup /foo/bar should fail...\n");
-	assert(system(PING_CMD) != 0);
-
-	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo/bar");
-		goto err;
-	}
-
-	if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching program from /foo");
-		goto err;
-	}
-
-	printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
-	       "This ping in cgroup /foo/bar should pass...\n");
-	assert(system(PING_CMD) == 0);
-
-	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
-		errno = 0;
-		log_err("Unexpected success attaching prog to /foo/bar");
-		goto err;
-	}
-
-	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching program from /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
-		errno = 0;
-		log_err("Unexpected success in double detach from /foo");
-		goto err;
-	}
-
-	if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
-		log_err("Attaching non-overridable prog to /foo");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
-		errno = 0;
-		log_err("Unexpected success attaching non-overridable prog to /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_ALLOW_OVERRIDE)) {
-		errno = 0;
-		log_err("Unexpected success attaching overridable prog to /foo/bar");
-		goto err;
-	}
-
-	if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_ALLOW_OVERRIDE)) {
-		errno = 0;
-		log_err("Unexpected success attaching overridable prog to /foo");
-		goto err;
-	}
-
-	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
-		log_err("Attaching different non-overridable prog to /foo");
-		goto err;
-	}
-
-	goto out;
-
-err:
-	rc = 1;
-
-out:
-	close(foo);
-	close(bar);
-	cleanup_cgroup_environment();
-	if (!rc)
-		printf("### override:PASS\n");
-	else
-		printf("### override:FAIL\n");
-	return rc;
-}
-
-static int map_fd = -1;
-
-static int prog_load_cnt(int verdict, int val)
-{
-	int cgroup_storage_fd, percpu_cgroup_storage_fd;
-
-	if (map_fd < 0)
-		map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
-	if (map_fd < 0) {
-		printf("failed to create map '%s'\n", strerror(errno));
-		return -1;
-	}
-
-	cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
-				sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
-	if (cgroup_storage_fd < 0) {
-		printf("failed to create map '%s'\n", strerror(errno));
-		return -1;
-	}
-
-	percpu_cgroup_storage_fd = bpf_create_map(
-		BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
-		sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
-	if (percpu_cgroup_storage_fd < 0) {
-		printf("failed to create map '%s'\n", strerror(errno));
-		return -1;
-	}
-
-	struct bpf_insn prog[] = {
-		BPF_MOV32_IMM(BPF_REG_0, 0),
-		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
-		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
-		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
-		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
-		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
-
-		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
-		BPF_MOV64_IMM(BPF_REG_2, 0),
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
-		BPF_MOV64_IMM(BPF_REG_1, val),
-		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
-
-		BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
-		BPF_MOV64_IMM(BPF_REG_2, 0),
-		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
-		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
-		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
-		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
-
-		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
-		BPF_EXIT_INSN(),
-	};
-	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-	int ret;
-
-	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
-			       prog, insns_cnt, "GPL", 0,
-			       bpf_log_buf, BPF_LOG_BUF_SIZE);
-
-	if (ret < 0) {
-		log_err("Loading program");
-		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
-		return 0;
-	}
-	close(cgroup_storage_fd);
-	return ret;
-}
-
-
-static int test_multiprog(void)
-{
-	__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
-	int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
-	int drop_prog, allow_prog[6] = {}, rc = 0;
-	unsigned long long value;
-	int i = 0;
-
-	for (i = 0; i < 6; i++) {
-		allow_prog[i] = prog_load_cnt(1, 1 << i);
-		if (!allow_prog[i])
-			goto err;
-	}
-	drop_prog = prog_load_cnt(0, 1);
-	if (!drop_prog)
-		goto err;
-
-	if (setup_cgroup_environment())
-		goto err;
-
-	cg1 = create_and_get_cgroup("/cg1");
-	if (cg1 < 0)
-		goto err;
-	cg2 = create_and_get_cgroup("/cg1/cg2");
-	if (cg2 < 0)
-		goto err;
-	cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
-	if (cg3 < 0)
-		goto err;
-	cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
-	if (cg4 < 0)
-		goto err;
-	cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
-	if (cg5 < 0)
-		goto err;
-
-	if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
-		goto err;
-
-	if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_MULTI)) {
-		log_err("Attaching prog to cg1");
-		goto err;
-	}
-	if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_ALLOW_MULTI)) {
-		log_err("Unexpected success attaching the same prog to cg1");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_MULTI)) {
-		log_err("Attaching prog2 to cg1");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to cg2");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_MULTI)) {
-		log_err("Attaching prog to cg3");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
-			    BPF_F_ALLOW_OVERRIDE)) {
-		log_err("Attaching prog to cg4");
-		goto err;
-	}
-	if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) {
-		log_err("Attaching prog to cg5");
-		goto err;
-	}
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 8 + 32);
-
-	/* query the number of effective progs in cg5 */
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      NULL, NULL, &prog_cnt) == 0);
-	assert(prog_cnt == 4);
-	/* retrieve prog_ids of effective progs in cg5 */
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      &attach_flags, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 4);
-	assert(attach_flags == 0);
-	saved_prog_id = prog_ids[0];
-	/* check enospc handling */
-	prog_ids[0] = 0;
-	prog_cnt = 2;
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      &attach_flags, prog_ids, &prog_cnt) == -1 &&
-	       errno == ENOSPC);
-	assert(prog_cnt == 4);
-	/* check that prog_ids are returned even when buffer is too small */
-	assert(prog_ids[0] == saved_prog_id);
-	/* retrieve prog_id of single attached prog in cg5 */
-	prog_ids[0] = 0;
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
-			      NULL, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 1);
-	assert(prog_ids[0] == saved_prog_id);
-
-	/* detach bottom program and ping again */
-	if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching prog from cg5");
-		goto err;
-	}
-	value = 0;
-	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 8 + 16);
-
-	/* detach 3rd from bottom program and ping again */
-	errno = 0;
-	if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Unexpected success on detach from cg3");
-		goto err;
-	}
-	if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching from cg3");
-		goto err;
-	}
-	value = 0;
-	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 16);
-
-	/* detach 2nd from bottom program and ping again */
-	if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
-		log_err("Detaching prog from cg4");
-		goto err;
-	}
-	value = 0;
-	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
-	assert(system(PING_CMD) == 0);
-	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
-	assert(value == 1 + 2 + 4);
-
-	prog_cnt = 4;
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
-			      &attach_flags, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 3);
-	assert(attach_flags == 0);
-	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
-			      NULL, prog_ids, &prog_cnt) == 0);
-	assert(prog_cnt == 0);
-	goto out;
-err:
-	rc = 1;
-
-out:
-	for (i = 0; i < 6; i++)
-		if (allow_prog[i] > 0)
-			close(allow_prog[i]);
-	close(cg1);
-	close(cg2);
-	close(cg3);
-	close(cg4);
-	close(cg5);
-	cleanup_cgroup_environment();
-	if (!rc)
-		printf("### multi:PASS\n");
-	else
-		printf("### multi:FAIL\n");
-	return rc;
-}
-
-int main(int argc, char **argv)
-{
-	int rc = 0;
-
-	rc = test_foo_bar();
-	if (rc)
-		return rc;
-
-	return test_multiprog();
-}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index b3da2ffdc158..b2a9902f11c5 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -22,6 +22,7 @@ test_lirc_mode2_user
 get_cgroup_id_user
 test_skb_cgroup_id_user
 test_socket_cookie
+test_cgroup_attach
 test_cgroup_storage
 test_select_reuseport
 test_flow_dissector
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fa002da36d0d..9b21391c4966 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-	test_btf_dump
+	test_btf_dump test_cgroup_attach
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -99,6 +99,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
 $(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
new file mode 100644
index 000000000000..2d6d57f50e10
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program accesses the map passed in to store two pieces of
+ *   information. The number of invocations of the program, which maps
+ *   to the number of packets received, is stored to key 0. Key 1 is
+ *   incremented on each iteration by the number of bytes stored in
+ *   the skb. The program also stores the number of received bytes
+ *   in the cgroup storage.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ *   packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define FOO		"/foo"
+#define BAR		"/foo/bar/"
+#define PING_CMD	"ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#ifdef DEBUG
+#define debug(args...) printf(args)
+#else
+#define debug(args...)
+#endif
+
+static int prog_load(int verdict)
+{
+	int ret;
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+			       prog, insns_cnt, "GPL", 0,
+			       bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	if (ret < 0) {
+		log_err("Loading program");
+		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
+		return 0;
+	}
+	return ret;
+}
+
+static int test_foo_bar(void)
+{
+	int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
+
+	allow_prog = prog_load(1);
+	if (!allow_prog)
+		goto err;
+
+	drop_prog = prog_load(0);
+	if (!drop_prog)
+		goto err;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	/* Create cgroup /foo, get fd, and join it */
+	foo = create_and_get_cgroup(FOO);
+	if (foo < 0)
+		goto err;
+
+	if (join_cgroup(FOO))
+		goto err;
+
+	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo");
+		goto err;
+	}
+
+	debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
+	assert(system(PING_CMD) != 0);
+
+	/* Create cgroup /foo/bar, get fd, and join it */
+	bar = create_and_get_cgroup(BAR);
+	if (bar < 0)
+		goto err;
+
+	if (join_cgroup(BAR))
+		goto err;
+
+	debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
+	assert(system(PING_CMD) != 0);
+
+	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo/bar");
+		goto err;
+	}
+
+	debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
+	assert(system(PING_CMD) == 0);
+
+	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching program from /foo/bar");
+		goto err;
+	}
+
+	debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
+	       "This ping in cgroup /foo/bar should fail...\n");
+	assert(system(PING_CMD) != 0);
+
+	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo/bar");
+		goto err;
+	}
+
+	if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching program from /foo");
+		goto err;
+	}
+
+	debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
+	       "This ping in cgroup /foo/bar should pass...\n");
+	assert(system(PING_CMD) == 0);
+
+	if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
+		errno = 0;
+		log_err("Unexpected success attaching prog to /foo/bar");
+		goto err;
+	}
+
+	if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching program from /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
+		errno = 0;
+		log_err("Unexpected success in double detach from /foo");
+		goto err;
+	}
+
+	if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
+		log_err("Attaching non-overridable prog to /foo");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
+		errno = 0;
+		log_err("Unexpected success attaching non-overridable prog to /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_ALLOW_OVERRIDE)) {
+		errno = 0;
+		log_err("Unexpected success attaching overridable prog to /foo/bar");
+		goto err;
+	}
+
+	if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_ALLOW_OVERRIDE)) {
+		errno = 0;
+		log_err("Unexpected success attaching overridable prog to /foo");
+		goto err;
+	}
+
+	if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
+		log_err("Attaching different non-overridable prog to /foo");
+		goto err;
+	}
+
+	goto out;
+
+err:
+	rc = 1;
+
+out:
+	close(foo);
+	close(bar);
+	cleanup_cgroup_environment();
+	if (!rc)
+		printf("#override:PASS\n");
+	else
+		printf("#override:FAIL\n");
+	return rc;
+}
+
+static int map_fd = -1;
+
+static int prog_load_cnt(int verdict, int val)
+{
+	int cgroup_storage_fd, percpu_cgroup_storage_fd;
+
+	if (map_fd < 0)
+		map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+	if (map_fd < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		return -1;
+	}
+
+	cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+				sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+	if (cgroup_storage_fd < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		return -1;
+	}
+
+	percpu_cgroup_storage_fd = bpf_create_map(
+		BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+		sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+	if (percpu_cgroup_storage_fd < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		return -1;
+	}
+
+	struct bpf_insn prog[] = {
+		BPF_MOV32_IMM(BPF_REG_0, 0),
+		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
+		BPF_MOV64_IMM(BPF_REG_2, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+		BPF_MOV64_IMM(BPF_REG_1, val),
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
+
+		BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
+		BPF_MOV64_IMM(BPF_REG_2, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+		BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
+		BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+
+		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+		BPF_EXIT_INSN(),
+	};
+	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	int ret;
+
+	ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+			       prog, insns_cnt, "GPL", 0,
+			       bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+	if (ret < 0) {
+		log_err("Loading program");
+		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
+		return 0;
+	}
+	close(cgroup_storage_fd);
+	return ret;
+}
+
+
+static int test_multiprog(void)
+{
+	__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
+	int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
+	int drop_prog, allow_prog[6] = {}, rc = 0;
+	unsigned long long value;
+	int i = 0;
+
+	for (i = 0; i < 6; i++) {
+		allow_prog[i] = prog_load_cnt(1, 1 << i);
+		if (!allow_prog[i])
+			goto err;
+	}
+	drop_prog = prog_load_cnt(0, 1);
+	if (!drop_prog)
+		goto err;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cg1 = create_and_get_cgroup("/cg1");
+	if (cg1 < 0)
+		goto err;
+	cg2 = create_and_get_cgroup("/cg1/cg2");
+	if (cg2 < 0)
+		goto err;
+	cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
+	if (cg3 < 0)
+		goto err;
+	cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
+	if (cg4 < 0)
+		goto err;
+	cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
+	if (cg5 < 0)
+		goto err;
+
+	if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
+		goto err;
+
+	if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_MULTI)) {
+		log_err("Attaching prog to cg1");
+		goto err;
+	}
+	if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+			     BPF_F_ALLOW_MULTI)) {
+		log_err("Unexpected success attaching the same prog to cg1");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_MULTI)) {
+		log_err("Attaching prog2 to cg1");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to cg2");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_MULTI)) {
+		log_err("Attaching prog to cg3");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
+			    BPF_F_ALLOW_OVERRIDE)) {
+		log_err("Attaching prog to cg4");
+		goto err;
+	}
+	if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) {
+		log_err("Attaching prog to cg5");
+		goto err;
+	}
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 8 + 32);
+
+	/* query the number of effective progs in cg5 */
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      NULL, NULL, &prog_cnt) == 0);
+	assert(prog_cnt == 4);
+	/* retrieve prog_ids of effective progs in cg5 */
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      &attach_flags, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 4);
+	assert(attach_flags == 0);
+	saved_prog_id = prog_ids[0];
+	/* check enospc handling */
+	prog_ids[0] = 0;
+	prog_cnt = 2;
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      &attach_flags, prog_ids, &prog_cnt) == -1 &&
+	       errno == ENOSPC);
+	assert(prog_cnt == 4);
+	/* check that prog_ids are returned even when buffer is too small */
+	assert(prog_ids[0] == saved_prog_id);
+	/* retrieve prog_id of single attached prog in cg5 */
+	prog_ids[0] = 0;
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
+			      NULL, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 1);
+	assert(prog_ids[0] == saved_prog_id);
+
+	/* detach bottom program and ping again */
+	if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching prog from cg5");
+		goto err;
+	}
+	value = 0;
+	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 8 + 16);
+
+	/* detach 3rd from bottom program and ping again */
+	errno = 0;
+	if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Unexpected success on detach from cg3");
+		goto err;
+	}
+	if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching from cg3");
+		goto err;
+	}
+	value = 0;
+	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 16);
+
+	/* detach 2nd from bottom program and ping again */
+	if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
+		log_err("Detaching prog from cg4");
+		goto err;
+	}
+	value = 0;
+	assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+	assert(system(PING_CMD) == 0);
+	assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+	assert(value == 1 + 2 + 4);
+
+	prog_cnt = 4;
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+			      &attach_flags, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 3);
+	assert(attach_flags == 0);
+	assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
+			      NULL, prog_ids, &prog_cnt) == 0);
+	assert(prog_cnt == 0);
+	goto out;
+err:
+	rc = 1;
+
+out:
+	for (i = 0; i < 6; i++)
+		if (allow_prog[i] > 0)
+			close(allow_prog[i]);
+	close(cg1);
+	close(cg2);
+	close(cg3);
+	close(cg4);
+	close(cg5);
+	cleanup_cgroup_environment();
+	if (!rc)
+		printf("#multi:PASS\n");
+	else
+		printf("#multi:FAIL\n");
+	return rc;
+}
+
+int main(void)
+{
+	int (*tests[])(void) = {test_foo_bar, test_multiprog};
+	int errors = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++)
+		if (tests[i]())
+			errors++;
+
+	if (errors)
+		printf("test_cgroup_attach:FAIL\n");
+	else
+		printf("test_cgroup_attach:PASS\n");
+
+	return errors ? EXIT_FAILURE : EXIT_SUCCESS;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 596092ef8beaa73891da4f406446b283cc566cdb Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Sat, 25 May 2019 09:37:41 -0700
Subject: selftests/bpf: enable all available cgroup v2 controllers

Enable all available cgroup v2 controllers when setting up
the environment for the bpf kselftests. It's required to properly test
the bpf prog auto-detach feature. Also it will generally increase
the code coverage.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 57 ++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 6692a40a6979..0d89f0396be4 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -33,6 +33,60 @@
 	snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
 		 CGROUP_WORK_DIR, path)
 
+/**
+ * enable_all_controllers() - Enable all available cgroup v2 controllers
+ *
+ * Enable all available cgroup v2 controllers in order to increase
+ * the code coverage.
+ *
+ * If successful, 0 is returned.
+ */
+int enable_all_controllers(char *cgroup_path)
+{
+	char path[PATH_MAX + 1];
+	char buf[PATH_MAX];
+	char *c, *c2;
+	int fd, cfd;
+	size_t len;
+
+	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		log_err("Opening cgroup.controllers: %s", path);
+		return 1;
+	}
+
+	len = read(fd, buf, sizeof(buf) - 1);
+	if (len < 0) {
+		close(fd);
+		log_err("Reading cgroup.controllers: %s", path);
+		return 1;
+	}
+	buf[len] = 0;
+	close(fd);
+
+	/* No controllers available? We're probably on cgroup v1. */
+	if (len == 0)
+		return 0;
+
+	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
+	cfd = open(path, O_RDWR);
+	if (cfd < 0) {
+		log_err("Opening cgroup.subtree_control: %s", path);
+		return 1;
+	}
+
+	for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
+		if (dprintf(cfd, "+%s\n", c) <= 0) {
+			log_err("Enabling controller %s: %s", c, path);
+			close(cfd);
+			return 1;
+		}
+	}
+	close(cfd);
+	return 0;
+}
+
 /**
  * setup_cgroup_environment() - Setup the cgroup environment
  *
@@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
 		return 1;
 	}
 
+	if (enable_all_controllers(cgroup_workdir))
+		return 1;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d5506591d54b915aaa9d489c9289af2cc88f47a4 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Sat, 25 May 2019 09:37:42 -0700
Subject: selftests/bpf: add auto-detach test

Add a kselftest to cover bpf auto-detachment functionality.
The test creates a cgroup, associates some resources with it,
attaches a couple of bpf programs and deletes the cgroup.

Then it checks that bpf programs are going away in 5 seconds.

Expected output:
  $ ./test_cgroup_attach
  #override:PASS
  #multi:PASS
  #autodetach:PASS
  test_cgroup_attach:PASS

On a kernel without auto-detaching:
  $ ./test_cgroup_attach
  #override:PASS
  #multi:PASS
  #autodetach:FAIL
  test_cgroup_attach:FAIL

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_cgroup_attach.c | 98 +++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
index 2d6d57f50e10..7671909ee1cb 100644
--- a/tools/testing/selftests/bpf/test_cgroup_attach.c
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -456,9 +456,105 @@ out:
 	return rc;
 }
 
+static int test_autodetach(void)
+{
+	__u32 prog_cnt = 4, attach_flags;
+	int allow_prog[2] = {0};
+	__u32 prog_ids[2] = {0};
+	int cg = 0, i, rc = -1;
+	void *ptr = NULL;
+	int attempts;
+
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+		allow_prog[i] = prog_load_cnt(1, 1 << i);
+		if (!allow_prog[i])
+			goto err;
+	}
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	/* create a cgroup, attach two programs and remember their ids */
+	cg = create_and_get_cgroup("/cg_autodetach");
+	if (cg < 0)
+		goto err;
+
+	if (join_cgroup("/cg_autodetach"))
+		goto err;
+
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+		if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
+				    BPF_F_ALLOW_MULTI)) {
+			log_err("Attaching prog[%d] to cg:egress", i);
+			goto err;
+		}
+	}
+
+	/* make sure that programs are attached and run some traffic */
+	assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
+			      prog_ids, &prog_cnt) == 0);
+	assert(system(PING_CMD) == 0);
+
+	/* allocate some memory (4Mb) to pin the original cgroup */
+	ptr = malloc(4 * (1 << 20));
+	if (!ptr)
+		goto err;
+
+	/* close programs and cgroup fd */
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+		close(allow_prog[i]);
+		allow_prog[i] = 0;
+	}
+
+	close(cg);
+	cg = 0;
+
+	/* leave the cgroup and remove it. don't detach programs */
+	cleanup_cgroup_environment();
+
+	/* wait for the asynchronous auto-detachment.
+	 * wait for no more than 5 sec and give up.
+	 */
+	for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
+		for (attempts = 5; attempts >= 0; attempts--) {
+			int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
+
+			if (fd < 0)
+				break;
+
+			/* don't leave the fd open */
+			close(fd);
+
+			if (!attempts)
+				goto err;
+
+			sleep(1);
+		}
+	}
+
+	rc = 0;
+err:
+	for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+		if (allow_prog[i] > 0)
+			close(allow_prog[i]);
+	if (cg)
+		close(cg);
+	free(ptr);
+	cleanup_cgroup_environment();
+	if (!rc)
+		printf("#autodetach:PASS\n");
+	else
+		printf("#autodetach:FAIL\n");
+	return rc;
+}
+
 int main(void)
 {
-	int (*tests[])(void) = {test_foo_bar, test_multiprog};
+	int (*tests[])(void) = {
+		test_foo_bar,
+		test_multiprog,
+		test_autodetach,
+	};
 	int errors = 0;
 	int i;
 
-- 
cgit v1.2.3-59-g8ed1b


From 3fb321fde22ddc3adc83e1f690b1fad9dfbd24ef Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 27 May 2019 16:47:51 -0400
Subject: selftests/net: ipv6 flowlabel

Test the IPv6 flowlabel control and datapath interfaces:

Acquire and release the right to use flowlabels with socket option
IPV6_FLOWLABEL_MGR.

Then configure flowlabels on send and read them on recv with cmsg
IPV6_FLOWINFO. Also verify auto-flowlabel if not explicitly set.

This helped identify the issue fixed in commit 95c169251bf73 ("ipv6:
invert flowlabel sharing check in process and user mode")

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/.gitignore           |   2 +
 tools/testing/selftests/net/Makefile             |   4 +-
 tools/testing/selftests/net/ipv6_flowlabel.c     | 229 +++++++++++++++++++++++
 tools/testing/selftests/net/ipv6_flowlabel.sh    |  21 +++
 tools/testing/selftests/net/ipv6_flowlabel_mgr.c | 199 ++++++++++++++++++++
 5 files changed, 453 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/net/ipv6_flowlabel.c
 create mode 100755 tools/testing/selftests/net/ipv6_flowlabel.sh
 create mode 100644 tools/testing/selftests/net/ipv6_flowlabel_mgr.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 27ef4d07ac91..99a4e41d5249 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -18,3 +18,5 @@ tls
 txring_overwrite
 ip_defrag
 so_txtime
+flowlabel
+flowlabel_mgr
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8af7869e0f1c..8343fb9d8a46 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -9,13 +9,13 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh
+TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c
new file mode 100644
index 000000000000..a7c41375374f
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO cmsg on send and recv */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWINFO
+#define IPV6_FLOWINFO 11
+#endif
+
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR 32
+#endif
+
+#define FLOWLABEL_WILDCARD	((uint32_t) -1)
+
+static const char cfg_data[]	= "a";
+static uint32_t cfg_label	= 1;
+
+static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel)
+{
+	char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	int ret;
+
+	iov.iov_base = (char *)cfg_data;
+	iov.iov_len = sizeof(cfg_data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	if (with_flowlabel) {
+		struct cmsghdr *cm;
+
+		cm = (void *)control;
+		cm->cmsg_len = CMSG_LEN(sizeof(flowlabel));
+		cm->cmsg_level = SOL_IPV6;
+		cm->cmsg_type = IPV6_FLOWINFO;
+		*(uint32_t *)CMSG_DATA(cm) = htonl(flowlabel);
+
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+	}
+
+	ret = sendmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "send");
+
+	if (with_flowlabel)
+		fprintf(stderr, "sent with label %u\n", flowlabel);
+	else
+		fprintf(stderr, "sent without label\n");
+}
+
+static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
+{
+	char control[CMSG_SPACE(sizeof(expect))];
+	char data[sizeof(cfg_data)];
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+	uint32_t flowlabel;
+	int ret;
+
+	iov.iov_base = data;
+	iov.iov_len = sizeof(data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	memset(control, 0, sizeof(control));
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, 0);
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+		error(1, 0, "recv: truncated");
+	if (ret != sizeof(cfg_data))
+		error(1, 0, "recv: length mismatch");
+	if (memcmp(data, cfg_data, sizeof(data)))
+		error(1, 0, "recv: data mismatch");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (with_flowlabel) {
+		if (!cm)
+			error(1, 0, "recv: missing cmsg");
+		if (CMSG_NXTHDR(&msg, cm))
+			error(1, 0, "recv: too many cmsg");
+		if (cm->cmsg_level != SOL_IPV6 ||
+		    cm->cmsg_type != IPV6_FLOWINFO)
+			error(1, 0, "recv: unexpected cmsg level or type");
+
+		flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm));
+		fprintf(stderr, "recv with label %u\n", flowlabel);
+
+		if (expect != FLOWLABEL_WILDCARD && expect != flowlabel)
+			fprintf(stderr, "recv: incorrect flowlabel %u != %u\n",
+					flowlabel, expect);
+
+	} else {
+		fprintf(stderr, "recv without label\n");
+	}
+}
+
+static bool get_autoflowlabel_enabled(void)
+{
+	int fd, ret;
+	char val;
+
+	fd = open("/proc/sys/net/ipv6/auto_flowlabels", O_RDONLY);
+	if (fd == -1)
+		error(1, errno, "open sysctl");
+
+	ret = read(fd, &val, 1);
+	if (ret == -1)
+		error(1, errno, "read sysctl");
+	if (ret == 0)
+		error(1, 0, "read sysctl: 0");
+
+	if (close(fd))
+		error(1, errno, "close sysctl");
+
+	return val == '1';
+}
+
+static void flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_GET,
+		.flr_label = htonl(label),
+		.flr_flags = flags,
+		.flr_share = share,
+	};
+
+	/* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+	req.flr_dst.s6_addr[0] = 0xfd;
+	req.flr_dst.s6_addr[15] = 0x1;
+
+	if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req)))
+		error(1, errno, "setsockopt flowlabel get");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "l:")) != -1) {
+		switch (c) {
+		case 'l':
+			cfg_label = strtoul(optarg, NULL, 0);
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	struct sockaddr_in6 addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(8000),
+		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
+	};
+	const int one = 1;
+	int fdt, fdr;
+
+	parse_opts(argc, argv);
+
+	fdt = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	fdr = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	if (connect(fdt, (void *)&addr, sizeof(addr)))
+		error(1, errno, "connect");
+	if (bind(fdr, (void *)&addr, sizeof(addr)))
+		error(1, errno, "bind");
+
+	flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE);
+
+	if (setsockopt(fdr, SOL_IPV6, IPV6_FLOWINFO, &one, sizeof(one)))
+		error(1, errno, "setsockopt flowinfo");
+
+	if (get_autoflowlabel_enabled()) {
+		fprintf(stderr, "send no label: recv auto flowlabel\n");
+		do_send(fdt, false, 0);
+		do_recv(fdr, true, FLOWLABEL_WILDCARD);
+	} else {
+		fprintf(stderr, "send no label: recv no label (auto off)\n");
+		do_send(fdt, false, 0);
+		do_recv(fdr, false, 0);
+	}
+
+	fprintf(stderr, "send label\n");
+	do_send(fdt, true, cfg_label);
+	do_recv(fdr, true, cfg_label);
+
+	if (close(fdr))
+		error(1, errno, "close r");
+	if (close(fdt))
+		error(1, errno, "close t");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
new file mode 100755
index 000000000000..d3bc6442704e
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for IPv6 flowlabels
+#
+# run in separate namespaces to avoid mgmt db conflicts betweent tests
+
+set -e
+
+echo "TEST management"
+./in_netns.sh ./ipv6_flowlabel_mgr
+
+echo "TEST datapath"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=0 && ./ipv6_flowlabel -l 1'
+
+echo "TEST datapath (with auto-flowlabels)"
+./in_netns.sh \
+  sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1'
+
+echo OK. All tests passed
diff --git a/tools/testing/selftests/net/ipv6_flowlabel_mgr.c b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
new file mode 100644
index 000000000000..af95b48acea9
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO_MGR */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR	32
+#endif
+
+/* from net/ipv6/ip6_flowlabel.c */
+#define FL_MIN_LINGER		6
+
+#define explain(x)							\
+	do { if (cfg_verbose) fprintf(stderr, "       " x "\n"); } while (0)
+
+#define __expect(x)							\
+	do {								\
+		if (!(x))						\
+			fprintf(stderr, "[OK]   " #x "\n");		\
+		else							\
+			error(1, 0, "[ERR]  " #x " (line %d)", __LINE__); \
+	} while (0)
+
+#define expect_pass(x)	__expect(x)
+#define expect_fail(x)	__expect(!(x))
+
+static bool cfg_long_running;
+static bool cfg_verbose;
+
+static int flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_GET,
+		.flr_label = htonl(label),
+		.flr_flags = flags,
+		.flr_share = share,
+	};
+
+	/* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+	req.flr_dst.s6_addr[0] = 0xfd;
+	req.flr_dst.s6_addr[15] = 0x1;
+
+	return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static int flowlabel_put(int fd, uint32_t label)
+{
+	struct in6_flowlabel_req req = {
+		.flr_action = IPV6_FL_A_PUT,
+		.flr_label = htonl(label),
+	};
+
+	return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static void run_tests(int fd)
+{
+	int wstatus;
+	pid_t pid;
+
+	explain("cannot get non-existent label");
+	expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+
+	explain("cannot put non-existent label");
+	expect_fail(flowlabel_put(fd, 1));
+
+	explain("cannot create label greater than 20 bits");
+	expect_fail(flowlabel_get(fd, 0x1FFFFF, IPV6_FL_S_ANY,
+				  IPV6_FL_F_CREATE));
+
+	explain("create a new label (FL_F_CREATE)");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+	explain("can get the label (without FL_F_CREATE)");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+	explain("can get it again with create flag set, too");
+	expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+	explain("cannot get it again with the exclusive (FL_FL_EXCL) flag");
+	expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY,
+					 IPV6_FL_F_CREATE | IPV6_FL_F_EXCL));
+	explain("can now put exactly three references");
+	expect_pass(flowlabel_put(fd, 1));
+	expect_pass(flowlabel_put(fd, 1));
+	expect_pass(flowlabel_put(fd, 1));
+	expect_fail(flowlabel_put(fd, 1));
+
+	explain("create a new exclusive label (FL_S_EXCL)");
+	expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+	explain("cannot get it again in non-exclusive mode");
+	expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY,  IPV6_FL_F_CREATE));
+	explain("cannot get it again in exclusive mode either");
+	expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+	expect_pass(flowlabel_put(fd, 2));
+
+	if (cfg_long_running) {
+		explain("cannot reuse the label, due to linger");
+		expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+					  IPV6_FL_F_CREATE));
+		explain("after sleep, can reuse");
+		sleep(FL_MIN_LINGER * 2 + 1);
+		expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+					  IPV6_FL_F_CREATE));
+	}
+
+	explain("create a new user-private label (FL_S_USER)");
+	expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, IPV6_FL_F_CREATE));
+	explain("cannot get it again in non-exclusive mode");
+	expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_ANY, 0));
+	explain("cannot get it again in exclusive mode");
+	expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_EXCL, 0));
+	explain("can get it again in user mode");
+	expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+	explain("child process can get it too, but not after setuid(nobody)");
+	pid = fork();
+	if (pid == -1)
+		error(1, errno, "fork");
+	if (!pid) {
+		expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+		if (setuid(USHRT_MAX))
+			fprintf(stderr, "[INFO] skip setuid child test\n");
+		else
+			expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+		exit(0);
+	}
+	if (wait(&wstatus) == -1)
+		error(1, errno, "wait");
+	if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+		error(1, errno, "wait: unexpected child result");
+
+	explain("create a new process-private label (FL_S_PROCESS)");
+	expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, IPV6_FL_F_CREATE));
+	explain("can get it again");
+	expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+	explain("child process cannot can get it");
+	pid = fork();
+	if (pid == -1)
+		error(1, errno, "fork");
+	if (!pid) {
+		expect_fail(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+		exit(0);
+	}
+	if (wait(&wstatus) == -1)
+		error(1, errno, "wait");
+	if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+		error(1, errno, "wait: unexpected child result");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "lv")) != -1) {
+		switch (c) {
+		case 'l':
+			cfg_long_running = true;
+			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int fd;
+
+	parse_opts(argc, argv);
+
+	fd = socket(PF_INET6, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	run_tests(fd);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From fe937ea12ec83da6c6bbbdb54a03062155318d0e Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Wed, 29 May 2019 10:48:14 +0100
Subject: selftests/bpf: fix compilation error for flow_dissector.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building the tools/testing/selftest/bpf subdirectory,
(running both a local directory "make" and a
"make -C tools/testing/selftests/bpf") I keep hitting the
following compilation error:

prog_tests/flow_dissector.c: In function ‘create_tap’:
prog_tests/flow_dissector.c:150:38: error: ‘IFF_NAPI’ undeclared (first
use in this function)
   .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS,
                                      ^
prog_tests/flow_dissector.c:150:38: note: each undeclared identifier is
reported only once for each function it appears in
prog_tests/flow_dissector.c:150:49: error: ‘IFF_NAPI_FRAGS’ undeclared

Adding include/uapi/linux/if_tun.h to tools/include/uapi/linux
resolves the problem and ensures the compilation of the file
does not depend on having up-to-date kernel headers locally.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/if_tun.h | 114 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 tools/include/uapi/linux/if_tun.h

(limited to 'tools')

diff --git a/tools/include/uapi/linux/if_tun.h b/tools/include/uapi/linux/if_tun.h
new file mode 100644
index 000000000000..454ae31b93c7
--- /dev/null
+++ b/tools/include/uapi/linux/if_tun.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ *  Universal TUN/TAP device driver.
+ *  Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ */
+
+#ifndef _UAPI__IF_TUN_H
+#define _UAPI__IF_TUN_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/filter.h>
+
+/* Read queue size */
+#define TUN_READQ_SIZE	500
+/* TUN device type flags: deprecated. Use IFF_TUN/IFF_TAP instead. */
+#define TUN_TUN_DEV 	IFF_TUN
+#define TUN_TAP_DEV	IFF_TAP
+#define TUN_TYPE_MASK   0x000f
+
+/* Ioctl defines */
+#define TUNSETNOCSUM  _IOW('T', 200, int) 
+#define TUNSETDEBUG   _IOW('T', 201, int) 
+#define TUNSETIFF     _IOW('T', 202, int) 
+#define TUNSETPERSIST _IOW('T', 203, int) 
+#define TUNSETOWNER   _IOW('T', 204, int)
+#define TUNSETLINK    _IOW('T', 205, int)
+#define TUNSETGROUP   _IOW('T', 206, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
+#define TUNSETOFFLOAD  _IOW('T', 208, unsigned int)
+#define TUNSETTXFILTER _IOW('T', 209, unsigned int)
+#define TUNGETIFF      _IOR('T', 210, unsigned int)
+#define TUNGETSNDBUF   _IOR('T', 211, int)
+#define TUNSETSNDBUF   _IOW('T', 212, int)
+#define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
+#define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
+#define TUNGETVNETHDRSZ _IOR('T', 215, int)
+#define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNSETQUEUE  _IOW('T', 217, int)
+#define TUNSETIFINDEX	_IOW('T', 218, unsigned int)
+#define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNGETVNETLE _IOR('T', 221, int)
+/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on
+ * little-endian hosts. Not all kernel configurations support them, but all
+ * configurations that support SET also support GET.
+ */
+#define TUNSETVNETBE _IOW('T', 222, int)
+#define TUNGETVNETBE _IOR('T', 223, int)
+#define TUNSETSTEERINGEBPF _IOR('T', 224, int)
+#define TUNSETFILTEREBPF _IOR('T', 225, int)
+#define TUNSETCARRIER _IOW('T', 226, int)
+#define TUNGETDEVNETNS _IO('T', 227)
+
+/* TUNSETIFF ifr flags */
+#define IFF_TUN		0x0001
+#define IFF_TAP		0x0002
+#define IFF_NAPI	0x0010
+#define IFF_NAPI_FRAGS	0x0020
+#define IFF_NO_PI	0x1000
+/* This flag has no real effect */
+#define IFF_ONE_QUEUE	0x2000
+#define IFF_VNET_HDR	0x4000
+#define IFF_TUN_EXCL	0x8000
+#define IFF_MULTI_QUEUE 0x0100
+#define IFF_ATTACH_QUEUE 0x0200
+#define IFF_DETACH_QUEUE 0x0400
+/* read-only flag */
+#define IFF_PERSIST	0x0800
+#define IFF_NOFILTER	0x1000
+
+/* Socket options */
+#define TUN_TX_TIMESTAMP 1
+
+/* Features for GSO (TUNSETOFFLOAD). */
+#define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */
+#define TUN_F_TSO4	0x02	/* I can handle TSO for IPv4 packets */
+#define TUN_F_TSO6	0x04	/* I can handle TSO for IPv6 packets */
+#define TUN_F_TSO_ECN	0x08	/* I can handle TSO with ECN bits. */
+#define TUN_F_UFO	0x10	/* I can handle UFO packets */
+
+/* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
+#define TUN_PKT_STRIP	0x0001
+struct tun_pi {
+	__u16  flags;
+	__be16 proto;
+};
+
+/*
+ * Filter spec (used for SETXXFILTER ioctls)
+ * This stuff is applicable only to the TAP (Ethernet) devices.
+ * If the count is zero the filter is disabled and the driver accepts
+ * all packets (promisc mode).
+ * If the filter is enabled in order to accept broadcast packets
+ * broadcast addr must be explicitly included in the addr list.
+ */
+#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */
+struct tun_filter {
+	__u16  flags; /* TUN_FLT_ flags see above */
+	__u16  count; /* Number of addresses */
+	__u8   addr[0][ETH_ALEN];
+};
+
+#endif /* _UAPI__IF_TUN_H */
-- 
cgit v1.2.3-59-g8ed1b


From 501b125a29f7cd8983960c5f3c80b337b1359cc3 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Wed, 29 May 2019 15:26:41 +0100
Subject: libbpf: prevent overwriting of log_level in bpf_object__load_progs()

There are two functions in libbpf that support passing a log_level
parameter for the verifier for loading programs:
bpf_object__load_xattr() and bpf_prog_load_xattr(). Both accept an
attribute object containing the log_level, and apply it to the programs
to load.

It turns out that to effectively load the programs, the latter function
eventually relies on the former. This was not taken into account when
adding support for log_level in bpf_object__load_xattr(), and the
log_level passed to bpf_prog_load_xattr() later gets overwritten with a
zero value, thus disabling verifier logs for the program in all cases:

bpf_prog_load_xattr()             // prog->log_level = attr1->log_level;
-> bpf_object__load()             // attr2->log_level = 0;
   -> bpf_object__load_xattr()    // <pass prog and attr2>
      -> bpf_object__load_progs() // prog->log_level = attr2->log_level;

Fix this by OR-ing the log_level in bpf_object__load_progs(), instead of
overwriting it.

v2: Fix commit log description (confusion on function names in v1).

Fixes: 60276f984998 ("libbpf: add bpf_object__load_xattr() API function to pass log_level")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ca4432f5b067..30cb08e2eb75 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2232,7 +2232,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 	for (i = 0; i < obj->nr_programs; i++) {
 		if (bpf_program__is_function_storage(&obj->programs[i], obj))
 			continue;
-		obj->programs[i].log_level = log_level;
+		obj->programs[i].log_level |= log_level;
 		err = bpf_program__load(&obj->programs[i],
 					obj->license,
 					obj->kern_version);
-- 
cgit v1.2.3-59-g8ed1b


From 8ca990ce0d402d5aaf05f7a33ff025fcbcbb5f93 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:03 -0700
Subject: libbpf: fix detection of corrupted BPF instructions section

Ensure that size of a section w/ BPF instruction is exactly a multiple
of BPF instruction size.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 30cb08e2eb75..c985a7916e35 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -349,8 +349,11 @@ static int
 bpf_program__init(void *data, size_t size, char *section_name, int idx,
 		  struct bpf_program *prog)
 {
-	if (size < sizeof(struct bpf_insn)) {
-		pr_warning("corrupted section '%s'\n", section_name);
+	const size_t bpf_insn_sz = sizeof(struct bpf_insn);
+
+	if (size == 0 || size % bpf_insn_sz) {
+		pr_warning("corrupted section '%s', size: %zu\n",
+			   section_name, size);
 		return -EINVAL;
 	}
 
@@ -376,9 +379,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
 			   section_name);
 		goto errout;
 	}
-	prog->insns_cnt = size / sizeof(struct bpf_insn);
-	memcpy(prog->insns, data,
-	       prog->insns_cnt * sizeof(struct bpf_insn));
+	prog->insns_cnt = size / bpf_insn_sz;
+	memcpy(prog->insns, data, size);
 	prog->idx = idx;
 	prog->instances.fds = NULL;
 	prog->instances.nr = -1;
-- 
cgit v1.2.3-59-g8ed1b


From be5c5d4e9d8cbdd7fcdcbc45e86e34bc0bd1cefd Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:04 -0700
Subject: libbpf: preserve errno before calling into user callback

pr_warning ultimately may call into user-provided callback function,
which can clobber errno value, so we need to save it before that.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index c985a7916e35..40690b7e500e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -565,12 +565,12 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	} else {
 		obj->efile.fd = open(obj->path, O_RDONLY);
 		if (obj->efile.fd < 0) {
-			char errmsg[STRERR_BUFSIZE];
-			char *cp = libbpf_strerror_r(errno, errmsg,
-						     sizeof(errmsg));
+			char errmsg[STRERR_BUFSIZE], *cp;
 
+			err = -errno;
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 			pr_warning("failed to open %s: %s\n", obj->path, cp);
-			return -errno;
+			return err;
 		}
 
 		obj->efile.elf = elf_begin(obj->efile.fd,
-- 
cgit v1.2.3-59-g8ed1b


From 12ef5634a855eb642b4d28781526fbd830aa7f7a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:05 -0700
Subject: libbpf: simplify endianness check

Rewrite endianness check to use "more canonical" way, using
compiler-defined macros, similar to few other places in libbpf. It also
is more obvious and shorter.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 37 ++++++++++++-------------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 40690b7e500e..fb296eadc3f2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -607,31 +607,18 @@ errout:
 	return err;
 }
 
-static int
-bpf_object__check_endianness(struct bpf_object *obj)
-{
-	static unsigned int const endian = 1;
-
-	switch (obj->efile.ehdr.e_ident[EI_DATA]) {
-	case ELFDATA2LSB:
-		/* We are big endian, BPF obj is little endian. */
-		if (*(unsigned char const *)&endian != 1)
-			goto mismatch;
-		break;
-
-	case ELFDATA2MSB:
-		/* We are little endian, BPF obj is big endian. */
-		if (*(unsigned char const *)&endian != 0)
-			goto mismatch;
-		break;
-	default:
-		return -LIBBPF_ERRNO__ENDIAN;
-	}
-
-	return 0;
-
-mismatch:
-	pr_warning("Error: endianness mismatch.\n");
+static int bpf_object__check_endianness(struct bpf_object *obj)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+		return 0;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
+		return 0;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+	pr_warning("endianness mismatch.\n");
 	return -LIBBPF_ERRNO__ENDIAN;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c51829bb6e921337014d9ac3712b0b7e8cae9f3d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:06 -0700
Subject: libbpf: check map name retrieved from ELF

Validate there was no error retrieving symbol name corresponding to
a BPF map.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fb296eadc3f2..60ec9694923a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -920,6 +920,11 @@ bpf_object__init_maps(struct bpf_object *obj, int flags)
 		map_name = elf_strptr(obj->efile.elf,
 				      obj->efile.strtabidx,
 				      sym.st_name);
+		if (!map_name) {
+			pr_warning("failed to get map #%d name sym string for obj %s\n",
+				   map_idx, obj->path);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
 
 		obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
 		obj->maps[map_idx].offset = sym.st_value;
-- 
cgit v1.2.3-59-g8ed1b


From f102154d3158c8fe9713f03bd3492f9cb8409a5e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:07 -0700
Subject: libbpf: fix error code returned on corrupted ELF

All of libbpf errors are negative, except this one. Fix it.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 60ec9694923a..9f421206d3e7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1221,7 +1221,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 
 	if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
 		pr_warning("Corrupted ELF file: index of strtab invalid\n");
-		return LIBBPF_ERRNO__FORMAT;
+		return -LIBBPF_ERRNO__FORMAT;
 	}
 	if (btf_data) {
 		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
-- 
cgit v1.2.3-59-g8ed1b


From fba01a0689a98613f5ee3b1569fd9321f3fde7bd Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:08 -0700
Subject: libbpf: use negative fd to specify missing BTF

0 is a valid FD, so it's better to initialize it to -1, as is done in
other places. Also, technically, BTF type ID 0 is valid (it's a VOID
type), so it's more reliable to check btf_fd, instead of
btf_key_type_id, to determine if there is any BTF associated with a map.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9f421206d3e7..b43913fc8b1b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1751,7 +1751,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 		create_attr.key_size = def->key_size;
 		create_attr.value_size = def->value_size;
 		create_attr.max_entries = def->max_entries;
-		create_attr.btf_fd = 0;
+		create_attr.btf_fd = -1;
 		create_attr.btf_key_type_id = 0;
 		create_attr.btf_value_type_id = 0;
 		if (bpf_map_type__is_map_in_map(def->type) &&
@@ -1765,11 +1765,11 @@ bpf_object__create_maps(struct bpf_object *obj)
 		}
 
 		*pfd = bpf_create_map_xattr(&create_attr);
-		if (*pfd < 0 && create_attr.btf_key_type_id) {
+		if (*pfd < 0 && create_attr.btf_fd >= 0) {
 			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
 				   map->name, cp, errno);
-			create_attr.btf_fd = 0;
+			create_attr.btf_fd = -1;
 			create_attr.btf_key_type_id = 0;
 			create_attr.btf_value_type_id = 0;
 			map->btf_key_type_id = 0;
@@ -2053,6 +2053,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	char *log_buf;
 	int ret;
 
+	if (!insns || !insns_cnt)
+		return -EINVAL;
+
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = prog->type;
 	load_attr.expected_attach_type = prog->expected_attach_type;
@@ -2063,7 +2066,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.license = license;
 	load_attr.kern_version = kern_version;
 	load_attr.prog_ifindex = prog->prog_ifindex;
-	load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
+	load_attr.prog_btf_fd = prog->btf_fd;
 	load_attr.func_info = prog->func_info;
 	load_attr.func_info_rec_size = prog->func_info_rec_size;
 	load_attr.func_info_cnt = prog->func_info_cnt;
@@ -2072,8 +2075,6 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.line_info_cnt = prog->line_info_cnt;
 	load_attr.log_level = prog->log_level;
 	load_attr.prog_flags = prog->prog_flags;
-	if (!load_attr.insns || !load_attr.insns_cnt)
-		return -EINVAL;
 
 retry_load:
 	log_buf = malloc(log_buf_size);
-- 
cgit v1.2.3-59-g8ed1b


From 7e8c328c4e96a0fc68d9f1264656683f7f1e12c1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:09 -0700
Subject: libbpf: simplify two pieces of logic

Extra check for type is unnecessary in first case.

Extra zeroing is unnecessary, as snprintf guarantees that it will
zero-terminate string.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b43913fc8b1b..33cd59bf9e72 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1430,8 +1430,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 				if (maps[map_idx].libbpf_type != type)
 					continue;
 				if (type != LIBBPF_MAP_UNSPEC ||
-				    (type == LIBBPF_MAP_UNSPEC &&
-				     maps[map_idx].offset == sym.st_value)) {
+				    maps[map_idx].offset == sym.st_value) {
 					pr_debug("relocation: find map %zd (%s) for insn %u\n",
 						 map_idx, maps[map_idx].name, insn_idx);
 					break;
@@ -2354,7 +2353,6 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 		snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
 			 (unsigned long)obj_buf,
 			 (unsigned long)obj_buf_sz);
-		tmp_name[sizeof(tmp_name) - 1] = '\0';
 		name = tmp_name;
 	}
 	pr_debug("loading object '%s' from buffer\n",
-- 
cgit v1.2.3-59-g8ed1b


From 76e1022b9653b5b5254e2f99ac467354b9c08de6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:10 -0700
Subject: libbpf: typo and formatting fixes

A bunch of typo and formatting fixes.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 33cd59bf9e72..a889925ecbcb 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -505,7 +505,7 @@ static struct bpf_object *bpf_object__new(const char *path,
 
 	obj->efile.fd = -1;
 	/*
-	 * Caller of this function should also calls
+	 * Caller of this function should also call
 	 * bpf_object__elf_finish() after data collection to return
 	 * obj_buf to user. If not, we should duplicate the buffer to
 	 * avoid user freeing them before elf finish.
@@ -574,8 +574,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 		}
 
 		obj->efile.elf = elf_begin(obj->efile.fd,
-				LIBBPF_ELF_C_READ_MMAP,
-				NULL);
+					   LIBBPF_ELF_C_READ_MMAP, NULL);
 	}
 
 	if (!obj->efile.elf) {
@@ -594,9 +593,9 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	ep = &obj->efile.ehdr;
 
 	/* Old LLVM set e_machine to EM_NONE */
-	if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) {
-		pr_warning("%s is not an eBPF object file\n",
-			obj->path);
+	if (ep->e_type != ET_REL ||
+	    (ep->e_machine && ep->e_machine != EM_BPF)) {
+		pr_warning("%s is not an eBPF object file\n", obj->path);
 		err = -LIBBPF_ERRNO__FORMAT;
 		goto errout;
 	}
@@ -1438,7 +1437,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			}
 
 			if (map_idx >= nr_maps) {
-				pr_warning("bpf relocation: map_idx %d large than %d\n",
+				pr_warning("bpf relocation: map_idx %d larger than %d\n",
 					   (int)map_idx, (int)nr_maps - 1);
 				return -LIBBPF_ERRNO__RELOC;
 			}
@@ -1797,7 +1796,7 @@ err_out:
 			}
 		}
 
-		pr_debug("create map %s: fd=%d\n", map->name, *pfd);
+		pr_debug("created map %s: fd=%d\n", map->name, *pfd);
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 399dc65e9ca0c35c8ceea7130b25e7be8f229a52 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 May 2019 10:36:11 -0700
Subject: libbpf: reduce unnecessary line wrapping

There are a bunch of lines of code or comments that are unnecessary
wrapped into multi-lines. Fix that without violating any code
guidelines.

Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 52 ++++++++++++++++----------------------------------
 1 file changed, 16 insertions(+), 36 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index a889925ecbcb..ba89d9727137 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -497,8 +497,7 @@ static struct bpf_object *bpf_object__new(const char *path,
 
 	strcpy(obj->path, path);
 	/* Using basename() GNU version which doesn't modify arg. */
-	strncpy(obj->name, basename((void *)path),
-		sizeof(obj->name) - 1);
+	strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1);
 	end = strchr(obj->name, '.');
 	if (end)
 		*end = 0;
@@ -578,15 +577,13 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	}
 
 	if (!obj->efile.elf) {
-		pr_warning("failed to open %s as ELF file\n",
-				obj->path);
+		pr_warning("failed to open %s as ELF file\n", obj->path);
 		err = -LIBBPF_ERRNO__LIBELF;
 		goto errout;
 	}
 
 	if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
-		pr_warning("failed to get EHDR from %s\n",
-				obj->path);
+		pr_warning("failed to get EHDR from %s\n", obj->path);
 		err = -LIBBPF_ERRNO__FORMAT;
 		goto errout;
 	}
@@ -622,18 +619,15 @@ static int bpf_object__check_endianness(struct bpf_object *obj)
 }
 
 static int
-bpf_object__init_license(struct bpf_object *obj,
-			 void *data, size_t size)
+bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
 {
-	memcpy(obj->license, data,
-	       min(size, sizeof(obj->license) - 1));
+	memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
 	pr_debug("license of %s is %s\n", obj->path, obj->license);
 	return 0;
 }
 
 static int
-bpf_object__init_kversion(struct bpf_object *obj,
-			  void *data, size_t size)
+bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
 {
 	__u32 kver;
 
@@ -643,8 +637,7 @@ bpf_object__init_kversion(struct bpf_object *obj,
 	}
 	memcpy(&kver, data, sizeof(kver));
 	obj->kern_version = kver;
-	pr_debug("kernel version of %s is %x\n", obj->path,
-		 obj->kern_version);
+	pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
 	return 0;
 }
 
@@ -800,8 +793,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 	def->key_size = sizeof(int);
 	def->value_size = data->d_size;
 	def->max_entries = 1;
-	def->map_flags = type == LIBBPF_MAP_RODATA ?
-			 BPF_F_RDONLY_PROG : 0;
+	def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
 	if (data_buff) {
 		*data_buff = malloc(data->d_size);
 		if (!*data_buff) {
@@ -816,8 +808,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 	return 0;
 }
 
-static int
-bpf_object__init_maps(struct bpf_object *obj, int flags)
+static int bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
 	int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
 	bool strict = !(flags & MAPS_RELAX_COMPAT);
@@ -1098,8 +1089,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 
 	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
 	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
-		pr_warning("failed to get e_shstrndx from %s\n",
-			   obj->path);
+		pr_warning("failed to get e_shstrndx from %s\n", obj->path);
 		return -LIBBPF_ERRNO__FORMAT;
 	}
 
@@ -1340,8 +1330,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	size_t nr_maps = obj->nr_maps;
 	int i, nrels;
 
-	pr_debug("collecting relocating info for: '%s'\n",
-		 prog->section_name);
+	pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
 	nrels = shdr->sh_size / shdr->sh_entsize;
 
 	prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
@@ -1366,9 +1355,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
-		if (!gelf_getsym(symbols,
-				 GELF_R_SYM(rel.r_info),
-				 &sym)) {
+		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
 			pr_warning("relocation: symbol %"PRIx64" not found\n",
 				   GELF_R_SYM(rel.r_info));
 			return -LIBBPF_ERRNO__FORMAT;
@@ -1817,18 +1804,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,
 	if (btf_prog_info) {
 		/*
 		 * Some info has already been found but has problem
-		 * in the last btf_ext reloc.  Must have to error
-		 * out.
+		 * in the last btf_ext reloc. Must have to error out.
 		 */
 		pr_warning("Error in relocating %s for sec %s.\n",
 			   info_name, prog->section_name);
 		return err;
 	}
 
-	/*
-	 * Have problem loading the very first info.  Ignore
-	 * the rest.
-	 */
+	/* Have problem loading the very first info. Ignore the rest. */
 	pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n",
 		   info_name, prog->section_name, info_name);
 	return 0;
@@ -2032,9 +2015,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 			return -LIBBPF_ERRNO__RELOC;
 		}
 
-		err = bpf_program__collect_reloc(prog,
-						 shdr, data,
-						 obj);
+		err = bpf_program__collect_reloc(prog, shdr, data, obj);
 		if (err)
 			return err;
 	}
@@ -2354,8 +2335,7 @@ struct bpf_object *bpf_object__open_buffer(void *obj_buf,
 			 (unsigned long)obj_buf_sz);
 		name = tmp_name;
 	}
-	pr_debug("loading object '%s' from buffer\n",
-		 name);
+	pr_debug("loading object '%s' from buffer\n", name);
 
 	return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 24ec483cec981618f8a4782a36d1e3f319d42cad Mon Sep 17 00:00:00 2001
From: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Tue, 28 May 2019 17:03:50 +0000
Subject: net: sched: Introduce act_ctinfo action
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ctinfo is a new tc filter action module.  It is designed to restore
information contained in firewall conntrack marks to other packet fields
and is typically used on packet ingress paths.  At present it has two
independent sub-functions or operating modes, DSCP restoration mode &
skb mark restoration mode.

The DSCP restore mode:

This mode copies DSCP values that have been placed in the firewall
conntrack mark back into the IPv4/v6 diffserv fields of relevant
packets.

The DSCP restoration is intended for use and has been found useful for
restoring ingress classifications based on egress classifications across
links that bleach or otherwise change DSCP, typically home ISP Internet
links.  Restoring DSCP on ingress on the WAN link allows qdiscs such as
but by no means limited to CAKE to shape inbound packets according to
policies that are easier to set & mark on egress.

Ingress classification is traditionally a challenging task since
iptables rules haven't yet run and tc filter/eBPF programs are pre-NAT
lookups, hence are unable to see internal IPv4 addresses as used on the
typical home masquerading gateway.  Thus marking the connection in some
manner on egress for later restoration of classification on ingress is
easier to implement.

Parameters related to DSCP restore mode:

dscpmask - a 32 bit mask of 6 contiguous bits and indicate bits of the
conntrack mark field contain the DSCP value to be restored.

statemask - a 32 bit mask of (usually) 1 bit length, outside the area
specified by dscpmask.  This represents a conditional operation flag
whereby the DSCP is only restored if the flag is set.  This is useful to
implement a 'one shot' iptables based classification where the
'complicated' iptables rules are only run once to classify the
connection on initial (egress) packet and subsequent packets are all
marked/restored with the same DSCP.  A mask of zero disables the
conditional behaviour ie. the conntrack mark DSCP bits are always
restored to the ip diffserv field (assuming the conntrack entry is found
& the skb is an ipv4/ipv6 type)

e.g. dscpmask 0xfc000000 statemask 0x01000000

|----0xFC----conntrack mark----000000---|
| Bits 31-26 | bit 25 | bit24 |~~~ Bit 0|
| DSCP       | unused | flag  |unused   |
|-----------------------0x01---000000---|
      |                   |
      |                   |
      ---|             Conditional flag
         v             only restore if set
|-ip diffserv-|
| 6 bits      |
|-------------|

The skb mark restore mode (cpmark):

This mode copies the firewall conntrack mark to the skb's mark field.
It is completely the functional equivalent of the existing act_connmark
action with the additional feature of being able to apply a mask to the
restored value.

Parameters related to skb mark restore mode:

mask - a 32 bit mask applied to the firewall conntrack mark to mask out
bits unwanted for restoration.  This can be useful where the conntrack
mark is being used for different purposes by different applications.  If
not specified and by default the whole mark field is copied (i.e.
default mask of 0xffffffff)

e.g. mask 0x00ffffff to mask out the top 8 bits being used by the
aforementioned DSCP restore mode.

|----0x00----conntrack mark----ffffff---|
| Bits 31-24 |                          |
| DSCP & flag|      some value here     |
|---------------------------------------|
			|
			|
			v
|------------skb mark-------------------|
|            |                          |
|  zeroed    |                          |
|---------------------------------------|

Overall parameters:

zone - conntrack zone

control - action related control (reclassify | pipe | drop | continue |
ok | goto chain <CHAIN_INDEX>)

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_ctinfo.h            |  28 +++
 include/uapi/linux/pkt_cls.h              |   1 +
 include/uapi/linux/tc_act/tc_ctinfo.h     |  34 +++
 net/sched/Kconfig                         |  17 ++
 net/sched/Makefile                        |   1 +
 net/sched/act_ctinfo.c                    | 396 ++++++++++++++++++++++++++++++
 tools/testing/selftests/tc-testing/config |   1 +
 7 files changed, 478 insertions(+)
 create mode 100644 include/net/tc_act/tc_ctinfo.h
 create mode 100644 include/uapi/linux/tc_act/tc_ctinfo.h
 create mode 100644 net/sched/act_ctinfo.c

(limited to 'tools')

diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h
new file mode 100644
index 000000000000..d6a688571672
--- /dev/null
+++ b/include/net/tc_act/tc_ctinfo.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_CTINFO_H
+#define __NET_TC_CTINFO_H
+
+#include <net/act_api.h>
+
+struct tcf_ctinfo_params {
+	struct rcu_head rcu;
+	struct net *net;
+	u32 dscpmask;
+	u32 dscpstatemask;
+	u32 cpmarkmask;
+	u16 zone;
+	u8 mode;
+	u8 dscpmaskshift;
+};
+
+struct tcf_ctinfo {
+	struct tc_action common;
+	struct tcf_ctinfo_params __rcu *params;
+	u64 stats_dscp_set;
+	u64 stats_dscp_error;
+	u64 stats_cpmark_set;
+};
+
+#define to_ctinfo(a) ((struct tcf_ctinfo *)a)
+
+#endif /* __NET_TC_CTINFO_H */
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 51a0496f78ea..a93680fc4bfa 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -105,6 +105,7 @@ enum tca_id {
 	TCA_ID_IFE = TCA_ACT_IFE,
 	TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
 	/* other actions go here */
+	TCA_ID_CTINFO,
 	__TCA_ID_MAX = 255
 };
 
diff --git a/include/uapi/linux/tc_act/tc_ctinfo.h b/include/uapi/linux/tc_act/tc_ctinfo.h
new file mode 100644
index 000000000000..da803e05a89b
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_ctinfo.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CTINFO_H
+#define __UAPI_TC_CTINFO_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+struct tc_ctinfo {
+	tc_gen;
+};
+
+enum {
+	TCA_CTINFO_UNSPEC,
+	TCA_CTINFO_PAD,
+	TCA_CTINFO_TM,
+	TCA_CTINFO_ACT,
+	TCA_CTINFO_ZONE,
+	TCA_CTINFO_PARMS_DSCP_MASK,
+	TCA_CTINFO_PARMS_DSCP_STATEMASK,
+	TCA_CTINFO_PARMS_CPMARK_MASK,
+	TCA_CTINFO_STATS_DSCP_SET,
+	TCA_CTINFO_STATS_DSCP_ERROR,
+	TCA_CTINFO_STATS_CPMARK_SET,
+	__TCA_CTINFO_MAX
+};
+
+#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1)
+
+enum {
+	CTINFO_MODE_DSCP	= BIT(0),
+	CTINFO_MODE_CPMARK	= BIT(1)
+};
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2c72d95c3050..d104f7ee26c7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -877,6 +877,23 @@ config NET_ACT_CONNMARK
 	  To compile this code as a module, choose M here: the
 	  module will be called act_connmark.
 
+config NET_ACT_CTINFO
+        tristate "Netfilter Connection Mark Actions"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+        help
+	  Say Y here to allow transfer of a connmark stored information.
+	  Current actions transfer connmark stored DSCP into
+	  ipv4/v6 diffserv and/or to transfer connmark to packet
+	  mark.  Both are useful for restoring egress based marks
+	  back onto ingress connections for qdisc priority mapping
+	  purposes.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_ctinfo.
+
 config NET_ACT_SKBMOD
         tristate "skb data modification action"
         depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8a40431d7b5c..d54bfcbd7981 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+obj-$(CONFIG_NET_ACT_CTINFO)	+= act_ctinfo.o
 obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
new file mode 100644
index 000000000000..926109139a81
--- /dev/null
+++ b/net/sched/act_ctinfo.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* net/sched/act_ctinfo.c  netfilter ctinfo connmark actions
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+#include <uapi/linux/tc_act/tc_ctinfo.h>
+#include <net/tc_act/tc_ctinfo.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static struct tc_action_ops act_ctinfo_ops;
+static unsigned int ctinfo_net_id;
+
+static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+				struct tcf_ctinfo_params *cp,
+				struct sk_buff *skb, int wlen, int proto)
+{
+	u8 dscp, newdscp;
+
+	newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
+		     ~INET_ECN_MASK;
+
+	switch (proto) {
+	case NFPROTO_IPV4:
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK;
+		if (dscp != newdscp) {
+			if (likely(!skb_try_make_writable(skb, wlen))) {
+				ipv4_change_dsfield(ip_hdr(skb),
+						    INET_ECN_MASK,
+						    newdscp);
+				ca->stats_dscp_set++;
+			} else {
+				ca->stats_dscp_error++;
+			}
+		}
+		break;
+	case NFPROTO_IPV6:
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK;
+		if (dscp != newdscp) {
+			if (likely(!skb_try_make_writable(skb, wlen))) {
+				ipv6_change_dsfield(ipv6_hdr(skb),
+						    INET_ECN_MASK,
+						    newdscp);
+				ca->stats_dscp_set++;
+			} else {
+				ca->stats_dscp_error++;
+			}
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+				  struct tcf_ctinfo_params *cp,
+				  struct sk_buff *skb)
+{
+	ca->stats_cpmark_set++;
+	skb->mark = ct->mark & cp->cpmarkmask;
+}
+
+static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash = NULL;
+	struct tcf_ctinfo *ca = to_ctinfo(a);
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_zone zone;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_ctinfo_params *cp;
+	struct nf_conn *ct;
+	int proto, wlen;
+	int action;
+
+	cp = rcu_dereference_bh(ca->params);
+
+	tcf_lastuse_update(&ca->tcf_tm);
+	bstats_update(&ca->tcf_bstats, skb);
+	action = READ_ONCE(ca->tcf_action);
+
+	wlen = skb_network_offset(skb);
+	if (tc_skb_protocol(skb) == htons(ETH_P_IP)) {
+		wlen += sizeof(struct iphdr);
+		if (!pskb_may_pull(skb, wlen))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) {
+		wlen += sizeof(struct ipv6hdr);
+		if (!pskb_may_pull(skb, wlen))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct) { /* look harder, usually ingress */
+		if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+				       proto, cp->net, &tuple))
+			goto out;
+		zone.id = cp->zone;
+		zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+		thash = nf_conntrack_find_get(cp->net, &zone, &tuple);
+		if (!thash)
+			goto out;
+
+		ct = nf_ct_tuplehash_to_ctrack(thash);
+	}
+
+	if (cp->mode & CTINFO_MODE_DSCP)
+		if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
+			tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
+
+	if (cp->mode & CTINFO_MODE_CPMARK)
+		tcf_ctinfo_cpmark_set(ct, ca, cp, skb);
+
+	if (thash)
+		nf_ct_put(ct);
+out:
+	return action;
+}
+
+static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
+	[TCA_CTINFO_ACT]		  = { .len = sizeof(struct
+							    tc_ctinfo) },
+	[TCA_CTINFO_ZONE]		  = { .type = NLA_U16 },
+	[TCA_CTINFO_PARMS_DSCP_MASK]	  = { .type = NLA_U32 },
+	[TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 },
+	[TCA_CTINFO_PARMS_CPMARK_MASK]	  = { .type = NLA_U32 },
+};
+
+static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
+			   struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+	struct nlattr *tb[TCA_CTINFO_MAX + 1];
+	struct tcf_ctinfo_params *cp_new;
+	struct tcf_chain *goto_ch = NULL;
+	u32 dscpmask = 0, dscpstatemask;
+	struct tc_ctinfo *actparm;
+	struct tcf_ctinfo *ci;
+	u8 dscpmaskshift;
+	int ret = 0, err;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, NULL);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_CTINFO_ACT])
+		return -EINVAL;
+	actparm = nla_data(tb[TCA_CTINFO_ACT]);
+
+	/* do some basic validation here before dynamically allocating things */
+	/* that we would otherwise have to clean up.			      */
+	if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
+		dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]);
+		/* need contiguous 6 bit mask */
+		dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0;
+		if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f)
+			return -EINVAL;
+		dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ?
+			nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0;
+		/* mask & statemask must not overlap */
+		if (dscpmask & dscpstatemask)
+			return -EINVAL;
+	}
+
+	/* done the validation:now to the actual action allocation */
+	err = tcf_idr_check_alloc(tn, &actparm->index, a, bind);
+	if (!err) {
+		ret = tcf_idr_create(tn, actparm->index, est, a,
+				     &act_ctinfo_ops, bind, false);
+		if (ret) {
+			tcf_idr_cleanup(tn, actparm->index);
+			return ret;
+		}
+	} else if (err > 0) {
+		if (bind) /* don't override defaults */
+			return 0;
+		if (!ovr) {
+			tcf_idr_release(*a, bind);
+			return -EEXIST;
+		}
+	} else {
+		return err;
+	}
+
+	err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
+	ci = to_ctinfo(*a);
+
+	cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL);
+	if (unlikely(!cp_new)) {
+		err = -ENOMEM;
+		goto put_chain;
+	}
+
+	cp_new->net = net;
+	cp_new->zone = tb[TCA_CTINFO_ZONE] ?
+			nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0;
+	if (dscpmask) {
+		cp_new->dscpmask = dscpmask;
+		cp_new->dscpmaskshift = dscpmaskshift;
+		cp_new->dscpstatemask = dscpstatemask;
+		cp_new->mode |= CTINFO_MODE_DSCP;
+	} else {
+		cp_new->mode &= ~CTINFO_MODE_DSCP;
+	}
+
+	if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) {
+		cp_new->cpmarkmask =
+				nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]);
+		cp_new->mode |= CTINFO_MODE_CPMARK;
+	} else {
+		cp_new->mode &= ~CTINFO_MODE_CPMARK;
+	}
+
+	spin_lock_bh(&ci->tcf_lock);
+	goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch);
+	rcu_swap_protected(ci->params, cp_new,
+			   lockdep_is_held(&ci->tcf_lock));
+	spin_unlock_bh(&ci->tcf_lock);
+
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+	if (cp_new)
+		kfree_rcu(cp_new, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
+
+	return ret;
+
+put_chain:
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
+}
+
+static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	struct tcf_ctinfo *ci = to_ctinfo(a);
+	struct tc_ctinfo opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = refcount_read(&ci->tcf_refcnt) - ref,
+		.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
+	};
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_ctinfo_params *cp;
+	struct tcf_t t;
+
+	spin_lock_bh(&ci->tcf_lock);
+	cp = rcu_dereference_protected(ci->params,
+				       lockdep_is_held(&ci->tcf_lock));
+
+	tcf_tm_dump(&t, &ci->tcf_tm);
+	if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	opt.action = ci->tcf_action;
+	if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone))
+		goto nla_put_failure;
+
+	if (cp->mode & CTINFO_MODE_DSCP) {
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK,
+				cp->dscpmask))
+			goto nla_put_failure;
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK,
+				cp->dscpstatemask))
+			goto nla_put_failure;
+	}
+
+	if (cp->mode & CTINFO_MODE_CPMARK) {
+		if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK,
+				cp->cpmarkmask))
+			goto nla_put_failure;
+	}
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
+			      ci->stats_dscp_set, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
+			      ci->stats_dscp_error, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
+			      ci->stats_cpmark_set, TCA_CTINFO_PAD))
+		goto nla_put_failure;
+
+	spin_unlock_bh(&ci->tcf_lock);
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(&ci->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops,
+			     struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_ctinfo_ops = {
+	.kind	= "ctinfo",
+	.id	= TCA_ID_CTINFO,
+	.owner	= THIS_MODULE,
+	.act	= tcf_ctinfo_act,
+	.dump	= tcf_ctinfo_dump,
+	.init	= tcf_ctinfo_init,
+	.walk	= tcf_ctinfo_walker,
+	.lookup	= tcf_ctinfo_search,
+	.size	= sizeof(struct tcf_ctinfo),
+};
+
+static __net_init int ctinfo_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+	return tc_action_net_init(tn, &act_ctinfo_ops);
+}
+
+static void __net_exit ctinfo_exit_net(struct list_head *net_list)
+{
+	tc_action_net_exit(net_list, ctinfo_net_id);
+}
+
+static struct pernet_operations ctinfo_net_ops = {
+	.init		= ctinfo_init_net,
+	.exit_batch	= ctinfo_exit_net,
+	.id		= &ctinfo_net_id,
+	.size		= sizeof(struct tc_action_net),
+};
+
+static int __init ctinfo_init_module(void)
+{
+	return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+static void __exit ctinfo_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+module_init(ctinfo_init_module);
+module_exit(ctinfo_cleanup_module);
+MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>");
+MODULE_DESCRIPTION("Connection tracking mark actions");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 203302065458..b235efd55367 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -38,6 +38,7 @@ CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_VLAN=m
 CONFIG_NET_ACT_BPF=m
 CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_CTINFO=m
 CONFIG_NET_ACT_SKBMOD=m
 CONFIG_NET_ACT_IFE=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
-- 
cgit v1.2.3-59-g8ed1b


From 10fbcdd12aa24fdb4b357a39abc03686d15f04fd Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 29 May 2019 12:34:01 -0400
Subject: selftests/net: add TFO key rotation selftest

Demonstrate how the primary and backup TFO keys can be rotated while
minimizing the number of client cookies that are rejected.

Signed-off-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/.gitignore             |   1 +
 tools/testing/selftests/net/Makefile               |   2 +
 .../selftests/net/tcp_fastopen_backup_key.c        | 336 +++++++++++++++++++++
 .../selftests/net/tcp_fastopen_backup_key.sh       |  55 ++++
 4 files changed, 394 insertions(+)
 create mode 100644 tools/testing/selftests/net/tcp_fastopen_backup_key.c
 create mode 100755 tools/testing/selftests/net/tcp_fastopen_backup_key.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 99a4e41d5249..4ce0bc1612f5 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -20,3 +20,4 @@ ip_defrag
 so_txtime
 flowlabel
 flowlabel_mgr
+tcp_fastopen_backup_key
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8343fb9d8a46..9a275d932fd5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -10,12 +10,14 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
 TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
 TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
 TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
 TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += tcp_fastopen_backup_key
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
new file mode 100644
index 000000000000..58bb77d9e7e1
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test key rotation for TFO.
+ * New keys are 'rotated' in two steps:
+ * 1) Add new key as the 'backup' key 'behind' the primary key
+ * 2) Make new key the primary by swapping the backup and primary keys
+ *
+ * The rotation is done in stages using multiple sockets bound
+ * to the same port via SO_REUSEPORT. This simulates key rotation
+ * behind say a load balancer. We verify that across the rotation
+ * there are no cases in which a cookie is not accepted by verifying
+ * that TcpExtTCPFastOpenPassiveFail remains 0.
+ */
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <netinet/tcp.h>
+#include <fcntl.h>
+#include <time.h>
+
+#ifndef TCP_FASTOPEN_KEY
+#define TCP_FASTOPEN_KEY 33
+#endif
+
+#define N_LISTEN 10
+#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
+#define KEY_LENGTH 16
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+static bool do_ipv6;
+static bool do_sockopt;
+static bool do_rotate;
+static int key_len = KEY_LENGTH;
+static int rcv_fds[N_LISTEN];
+static int proc_fd;
+static const char *IP4_ADDR = "127.0.0.1";
+static const char *IP6_ADDR = "::1";
+static const int PORT = 8891;
+
+static void get_keys(int fd, uint32_t *keys)
+{
+	char buf[128];
+	int len = KEY_LENGTH * 2;
+
+	if (do_sockopt) {
+		if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len))
+			error(1, errno, "Unable to get key");
+		return;
+	}
+	lseek(proc_fd, 0, SEEK_SET);
+	if (read(proc_fd, buf, sizeof(buf)) <= 0)
+		error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY);
+	if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2,
+	    keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8)
+		error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY);
+}
+
+static void set_keys(int fd, uint32_t *keys)
+{
+	char buf[128];
+
+	if (do_sockopt) {
+		if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys,
+		    key_len))
+			error(1, errno, "Unable to set key");
+		return;
+	}
+	if (do_rotate)
+		snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x",
+			 keys[0], keys[1], keys[2], keys[3], keys[4], keys[5],
+			 keys[6], keys[7]);
+	else
+		snprintf(buf, 128, "%08x-%08x-%08x-%08x",
+			 keys[0], keys[1], keys[2], keys[3]);
+	lseek(proc_fd, 0, SEEK_SET);
+	if (write(proc_fd, buf, sizeof(buf)) <= 0)
+		error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY);
+}
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds)
+{
+	struct sockaddr_in  addr4 = {0};
+	struct sockaddr_in6 addr6 = {0};
+	struct sockaddr *addr;
+	int opt = 1, i, sz;
+	int qlen = 100;
+	uint32_t keys[8];
+
+	switch (family) {
+	case AF_INET:
+		addr4.sin_family = family;
+		addr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		addr4.sin_port = htons(PORT);
+		sz = sizeof(addr4);
+		addr = (struct sockaddr *)&addr4;
+		break;
+	case AF_INET6:
+		addr6.sin6_family = AF_INET6;
+		addr6.sin6_addr = in6addr_any;
+		addr6.sin6_port = htons(PORT);
+		sz = sizeof(addr6);
+		addr = (struct sockaddr *)&addr6;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return;
+	}
+	for (i = 0; i < ARRAY_SIZE(keys); i++)
+		keys[i] = rand();
+	for (i = 0; i < N_LISTEN; i++) {
+		rcv_fds[i] = socket(family, proto, 0);
+		if (rcv_fds[i] < 0)
+			error(1, errno, "failed to create receive socket");
+		if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT");
+		if (bind(rcv_fds[i], addr, sz))
+			error(1, errno, "failed to bind receive socket");
+		if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen,
+			       sizeof(qlen)))
+			error(1, errno, "failed to set TCP_FASTOPEN");
+		set_keys(rcv_fds[i], keys);
+		if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+			error(1, errno, "failed to listen on receive port");
+	}
+}
+
+static int connect_and_send(int family, int proto)
+{
+	struct sockaddr_in  saddr4 = {0};
+	struct sockaddr_in  daddr4 = {0};
+	struct sockaddr_in6 saddr6 = {0};
+	struct sockaddr_in6 daddr6 = {0};
+	struct sockaddr *saddr, *daddr;
+	int fd, sz, ret;
+	char data[1];
+
+	switch (family) {
+	case AF_INET:
+		saddr4.sin_family = AF_INET;
+		saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
+		saddr4.sin_port = 0;
+
+		daddr4.sin_family = AF_INET;
+		if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
+			error(1, errno, "inet_pton failed: %s", IP4_ADDR);
+		daddr4.sin_port = htons(PORT);
+
+		sz = sizeof(saddr4);
+		saddr = (struct sockaddr *)&saddr4;
+		daddr = (struct sockaddr *)&daddr4;
+		break;
+	case AF_INET6:
+		saddr6.sin6_family = AF_INET6;
+		saddr6.sin6_addr = in6addr_any;
+
+		daddr6.sin6_family = AF_INET6;
+		if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
+			error(1, errno, "inet_pton failed: %s", IP6_ADDR);
+		daddr6.sin6_port = htons(PORT);
+
+		sz = sizeof(saddr6);
+		saddr = (struct sockaddr *)&saddr6;
+		daddr = (struct sockaddr *)&daddr6;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+		/* clang does not recognize error() above as terminating
+		 * the program, so it complains that saddr, daddr, sz are
+		 * not initialized when this code path is taken. Silence it.
+		 */
+		return -1;
+	}
+	fd = socket(family, proto, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+	if (bind(fd, saddr, sz))
+		error(1, errno, "failed to bind send socket");
+	data[0] = 'a';
+	ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz);
+	if (ret != 1)
+		error(1, errno, "failed to sendto");
+
+	return fd;
+}
+
+static bool is_listen_fd(int fd)
+{
+	int i;
+
+	for (i = 0; i < N_LISTEN; i++) {
+		if (rcv_fds[i] == fd)
+			return true;
+	}
+	return false;
+}
+
+static int rotate_key(int fd)
+{
+	static int iter;
+	static uint32_t new_key[4];
+	uint32_t keys[8];
+	uint32_t tmp_key[4];
+	int i;
+	int len = KEY_LENGTH * 2;
+
+	if (iter < N_LISTEN) {
+		/* first set new key as backups */
+		if (iter == 0) {
+			for (i = 0; i < ARRAY_SIZE(new_key); i++)
+				new_key[i] = rand();
+		}
+		get_keys(fd, keys);
+		memcpy(keys + 4, new_key, KEY_LENGTH);
+		set_keys(fd, keys);
+	} else {
+		/* swap the keys */
+		get_keys(fd, keys);
+		memcpy(tmp_key, keys + 4, KEY_LENGTH);
+		memcpy(keys + 4, keys, KEY_LENGTH);
+		memcpy(keys, tmp_key, KEY_LENGTH);
+		set_keys(fd, keys);
+	}
+	if (++iter >= (N_LISTEN * 2))
+		iter = 0;
+}
+
+static void run_one_test(int family)
+{
+	struct epoll_event ev;
+	int i, send_fd;
+	int n_loops = 10000;
+	int rotate_key_fd = 0;
+	int key_rotate_interval = 50;
+	int fd, epfd;
+	char buf[1];
+
+	build_rcv_fd(family, SOCK_STREAM, rcv_fds);
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+	ev.events = EPOLLIN;
+	for (i = 0; i < N_LISTEN; i++) {
+		ev.data.fd = rcv_fds[i];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+			error(1, errno, "failed to register sock epoll");
+	}
+	while (n_loops--) {
+		send_fd = connect_and_send(family, SOCK_STREAM);
+		if (do_rotate && ((n_loops % key_rotate_interval) == 0)) {
+			rotate_key(rcv_fds[rotate_key_fd]);
+			if (++rotate_key_fd >= N_LISTEN)
+				rotate_key_fd = 0;
+		}
+		while (1) {
+			i = epoll_wait(epfd, &ev, 1, -1);
+			if (i < 0)
+				error(1, errno, "epoll_wait failed");
+			if (is_listen_fd(ev.data.fd)) {
+				fd = accept(ev.data.fd, NULL, NULL);
+				if (fd < 0)
+					error(1, errno, "failed to accept");
+				ev.data.fd = fd;
+				if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+					error(1, errno, "failed epoll add");
+				continue;
+			}
+			i = recv(ev.data.fd, buf, sizeof(buf), 0);
+			if (i != 1)
+				error(1, errno, "failed recv data");
+			if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL))
+				error(1, errno, "failed epoll del");
+			close(ev.data.fd);
+			break;
+		}
+		close(send_fd);
+	}
+	for (i = 0; i < N_LISTEN; i++)
+		close(rcv_fds[i]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46sr")) != -1) {
+		switch (c) {
+		case '4':
+			do_ipv6 = false;
+			break;
+		case '6':
+			do_ipv6 = true;
+			break;
+		case 's':
+			do_sockopt = true;
+			break;
+		case 'r':
+			do_rotate = true;
+			key_len = KEY_LENGTH * 2;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+	proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR);
+	if (proc_fd < 0)
+		error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY);
+	srand(time(NULL));
+	if (do_ipv6)
+		run_one_test(AF_INET6);
+	else
+		run_one_test(AF_INET);
+	close(proc_fd);
+	fprintf(stderr, "PASS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
new file mode 100755
index 000000000000..41476399e184
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# rotate TFO keys for ipv4/ipv6 and verify that the client does
+# not present an invalid cookie.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+	ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \
+		>/dev/null 2>&1
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+	# flush routes before each run, otherwise successive runs can
+	# initially present an old TFO cookie
+	ip netns exec "${NETNS}" ip tcp_metrics flush
+	ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1"
+	val=$(ip netns exec "${NETNS}" nstat -az | \
+		grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}')
+	if [ $val -ne 0 ]; then
+		echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero"
+		return 1
+	fi
+}
+
+do_test "-4"
+do_test "-6"
+do_test "-4"
+do_test "-6"
+do_test "-4s"
+do_test "-6s"
+do_test "-4s"
+do_test "-6s"
+do_test "-4r"
+do_test "-6r"
+do_test "-4r"
+do_test "-6r"
+do_test "-4sr"
+do_test "-6sr"
+do_test "-4sr"
+do_test "-6sr"
+echo "all tests done"
-- 
cgit v1.2.3-59-g8ed1b


From cd5385029f1d2e6879b78fff1a7b15514004af17 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Fri, 31 May 2019 18:47:14 +0100
Subject: selftests/bpf: measure RTT from xdp using xdping

xdping allows us to get latency estimates from XDP.  Output looks
like this:

./xdping -I eth4 192.168.55.8
Setting up XDP for eth4, please wait...
XDP setup disrupts network connectivity, hit Ctrl+C to quit

Normal ping RTT data
[Ignore final RTT; it is distorted by XDP using the reply]
PING 192.168.55.8 (192.168.55.8) from 192.168.55.7 eth4: 56(84) bytes of data.
64 bytes from 192.168.55.8: icmp_seq=1 ttl=64 time=0.302 ms
64 bytes from 192.168.55.8: icmp_seq=2 ttl=64 time=0.208 ms
64 bytes from 192.168.55.8: icmp_seq=3 ttl=64 time=0.163 ms
64 bytes from 192.168.55.8: icmp_seq=8 ttl=64 time=0.275 ms

4 packets transmitted, 4 received, 0% packet loss, time 3079ms
rtt min/avg/max/mdev = 0.163/0.237/0.302/0.054 ms

XDP RTT data:
64 bytes from 192.168.55.8: icmp_seq=5 ttl=64 time=0.02808 ms
64 bytes from 192.168.55.8: icmp_seq=6 ttl=64 time=0.02804 ms
64 bytes from 192.168.55.8: icmp_seq=7 ttl=64 time=0.02815 ms
64 bytes from 192.168.55.8: icmp_seq=8 ttl=64 time=0.02805 ms

The xdping program loads the associated xdping_kern.o BPF program
and attaches it to the specified interface.  If run in client
mode (the default), it will add a map entry keyed by the
target IP address; this map will store RTT measurements, current
sequence number etc.  Finally in client mode the ping command
is executed, and the xdping BPF program will use the last ICMP
reply, reformulate it as an ICMP request with the next sequence
number and XDP_TX it.  After the reply to that request is received
we can measure RTT and repeat until the desired number of
measurements is made.  This is why the sequence numbers in the
normal ping are 1, 2, 3 and 8.  We XDP_TX a modified version
of ICMP reply 4 and keep doing this until we get the 4 replies
we need; hence the networking stack only sees reply 8, where
we have XDP_PASSed it upstream since we are done.

In server mode (-s), xdping simply takes ICMP requests and replies
to them in XDP rather than passing the request up to the networking
stack.  No map entry is required.

xdping can be run in native XDP mode (the default, or specified
via -N) or in skb mode (-S).

A test program test_xdping.sh exercises some of these options.

Note that native XDP does not seem to XDP_TX for veths, hence -N
is not tested.  Looking at the code, it looks like XDP_TX is
supported so I'm not sure if that's expected.  Running xdping in
native mode for ixgbe as both client and server works fine.

Changes since v4

- close fds on cleanup (Song Liu)

Changes since v3

- fixed seq to be __be16 (Song Liu)
- fixed fd checks in xdping.c (Song Liu)

Changes since v2

- updated commit message to explain why seq number of last
  ICMP reply is 8 not 4 (Song Liu)
- updated types of seq number, raddr and eliminated csum variable
  in xdpclient/xdpserver functions as it was not needed (Song Liu)
- added XDPING_DEFAULT_COUNT definition and usage specification of
  default/max counts (Song Liu)

Changes since v1
 - moved from RFC to PATCH
 - removed unused variable in ipv4_csum() (Song Liu)
 - refactored ICMP checks into icmp_check() function called by client
   and server programs and reworked client and server programs due
   to lack of shared code (Song Liu)
 - added checks to ensure that SKB and native mode are not requested
   together (Song Liu)

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore          |   1 +
 tools/testing/selftests/bpf/Makefile            |   5 +-
 tools/testing/selftests/bpf/progs/xdping_kern.c | 184 +++++++++++++++++
 tools/testing/selftests/bpf/test_xdping.sh      |  99 +++++++++
 tools/testing/selftests/bpf/xdping.c            | 258 ++++++++++++++++++++++++
 tools/testing/selftests/bpf/xdping.h            |  13 ++
 6 files changed, 558 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/xdping_kern.c
 create mode 100755 tools/testing/selftests/bpf/test_xdping.sh
 create mode 100644 tools/testing/selftests/bpf/xdping.c
 create mode 100644 tools/testing/selftests/bpf/xdping.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index b2a9902f11c5..7470327edcfe 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -38,3 +38,4 @@ libbpf.pc
 libbpf.so.*
 test_hashmap
 test_btf_dump
+xdping
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 9b21391c4966..2b426ae1cdc9 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
 	test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-	test_btf_dump test_cgroup_attach
+	test_btf_dump test_cgroup_attach xdping
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -57,7 +57,8 @@ TEST_PROGS := test_kmod.sh \
 	test_lwt_ip_encap.sh \
 	test_tcp_check_syncookie.sh \
 	test_tc_tunnel.sh \
-	test_tc_edt.sh
+	test_tc_edt.sh \
+	test_xdping.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
new file mode 100644
index 000000000000..87393e7c667c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
+
+#define KBUILD_MODNAME "foo"
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#include "xdping.h"
+
+struct bpf_map_def SEC("maps") ping_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct pinginfo),
+	.max_entries = 256,
+};
+
+static __always_inline void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
+static __always_inline __u16 csum_fold_helper(__wsum sum)
+{
+	sum = (sum & 0xffff) + (sum >> 16);
+	return ~((sum & 0xffff) + (sum >> 16));
+}
+
+static __always_inline __u16 ipv4_csum(void *data_start, int data_size)
+{
+	__wsum sum;
+
+	sum = bpf_csum_diff(0, 0, data_start, data_size, 0);
+	return csum_fold_helper(sum);
+}
+
+#define ICMP_ECHO_LEN		64
+
+static __always_inline int icmp_check(struct xdp_md *ctx, int type)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	struct icmphdr *icmph;
+	struct iphdr *iph;
+
+	if (data + sizeof(*eth) + sizeof(*iph) + ICMP_ECHO_LEN > data_end)
+		return XDP_PASS;
+
+	if (eth->h_proto != bpf_htons(ETH_P_IP))
+		return XDP_PASS;
+
+	iph = data + sizeof(*eth);
+
+	if (iph->protocol != IPPROTO_ICMP)
+		return XDP_PASS;
+
+	if (bpf_ntohs(iph->tot_len) - sizeof(*iph) != ICMP_ECHO_LEN)
+		return XDP_PASS;
+
+	icmph = data + sizeof(*eth) + sizeof(*iph);
+
+	if (icmph->type != type)
+		return XDP_PASS;
+
+	return XDP_TX;
+}
+
+SEC("xdpclient")
+int xdping_client(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct pinginfo *pinginfo = NULL;
+	struct ethhdr *eth = data;
+	struct icmphdr *icmph;
+	struct iphdr *iph;
+	__u64 recvtime;
+	__be32 raddr;
+	__be16 seq;
+	int ret;
+	__u8 i;
+
+	ret = icmp_check(ctx, ICMP_ECHOREPLY);
+
+	if (ret != XDP_TX)
+		return ret;
+
+	iph = data + sizeof(*eth);
+	icmph = data + sizeof(*eth) + sizeof(*iph);
+	raddr = iph->saddr;
+
+	/* Record time reply received. */
+	recvtime = bpf_ktime_get_ns();
+	pinginfo = bpf_map_lookup_elem(&ping_map, &raddr);
+	if (!pinginfo || pinginfo->seq != icmph->un.echo.sequence)
+		return XDP_PASS;
+
+	if (pinginfo->start) {
+#pragma clang loop unroll(full)
+		for (i = 0; i < XDPING_MAX_COUNT; i++) {
+			if (pinginfo->times[i] == 0)
+				break;
+		}
+		/* verifier is fussy here... */
+		if (i < XDPING_MAX_COUNT) {
+			pinginfo->times[i] = recvtime -
+					     pinginfo->start;
+			pinginfo->start = 0;
+			i++;
+		}
+		/* No more space for values? */
+		if (i == pinginfo->count || i == XDPING_MAX_COUNT)
+			return XDP_PASS;
+	}
+
+	/* Now convert reply back into echo request. */
+	swap_src_dst_mac(data);
+	iph->saddr = iph->daddr;
+	iph->daddr = raddr;
+	icmph->type = ICMP_ECHO;
+	seq = bpf_htons(bpf_ntohs(icmph->un.echo.sequence) + 1);
+	icmph->un.echo.sequence = seq;
+	icmph->checksum = 0;
+	icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN);
+
+	pinginfo->seq = seq;
+	pinginfo->start = bpf_ktime_get_ns();
+
+	return XDP_TX;
+}
+
+SEC("xdpserver")
+int xdping_server(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	struct icmphdr *icmph;
+	struct iphdr *iph;
+	__be32 raddr;
+	int ret;
+
+	ret = icmp_check(ctx, ICMP_ECHO);
+
+	if (ret != XDP_TX)
+		return ret;
+
+	iph = data + sizeof(*eth);
+	icmph = data + sizeof(*eth) + sizeof(*iph);
+	raddr = iph->saddr;
+
+	/* Now convert request into echo reply. */
+	swap_src_dst_mac(data);
+	iph->saddr = iph->daddr;
+	iph->daddr = raddr;
+	icmph->type = ICMP_ECHOREPLY;
+	icmph->checksum = 0;
+	icmph->checksum = ipv4_csum(icmph, ICMP_ECHO_LEN);
+
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
new file mode 100755
index 000000000000..c2f0ddb45531
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdping.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# xdping tests
+#   Here we setup and teardown configuration required to run
+#   xdping, exercising its options.
+#
+#   Setup is similar to test_tunnel tests but without the tunnel.
+#
+# Topology:
+# ---------
+#     root namespace   |     tc_ns0 namespace
+#                      |
+#      ----------      |     ----------
+#      |  veth1  | --------- |  veth0  |
+#      ----------    peer    ----------
+#
+# Device Configuration
+# --------------------
+# Root namespace with BPF
+# Device names and addresses:
+#	veth1 IP: 10.1.1.200
+#	xdp added to veth1, xdpings originate from here.
+#
+# Namespace tc_ns0 with BPF
+# Device names and addresses:
+#       veth0 IPv4: 10.1.1.100
+#	For some tests xdping run in server mode here.
+#
+
+readonly TARGET_IP="10.1.1.100"
+readonly TARGET_NS="xdp_ns0"
+
+readonly LOCAL_IP="10.1.1.200"
+
+setup()
+{
+	ip netns add $TARGET_NS
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 netns $TARGET_NS
+	ip netns exec $TARGET_NS ip addr add ${TARGET_IP}/24 dev veth0
+	ip addr add ${LOCAL_IP}/24 dev veth1
+	ip netns exec $TARGET_NS ip link set veth0 up
+	ip link set veth1 up
+}
+
+cleanup()
+{
+	set +e
+	ip netns delete $TARGET_NS 2>/dev/null
+	ip link del veth1 2>/dev/null
+	if [[ $server_pid -ne 0 ]]; then
+		kill -TERM $server_pid
+	fi
+}
+
+test()
+{
+	client_args="$1"
+	server_args="$2"
+
+	echo "Test client args '$client_args'; server args '$server_args'"
+
+	server_pid=0
+	if [[ -n "$server_args" ]]; then
+		ip netns exec $TARGET_NS ./xdping $server_args &
+		server_pid=$!
+		sleep 10
+	fi
+	./xdping $client_args $TARGET_IP
+
+	if [[ $server_pid -ne 0 ]]; then
+		kill -TERM $server_pid
+		server_pid=0
+	fi
+
+	echo "Test client args '$client_args'; server args '$server_args': PASS"
+}
+
+set -e
+
+server_pid=0
+
+trap cleanup EXIT
+
+setup
+
+for server_args in "" "-I veth0 -s -S" ; do
+	# client in skb mode
+	client_args="-I veth1 -S"
+	test "$client_args" "$server_args"
+
+	# client with count of 10 RTT measurements.
+	client_args="-I veth1 -S -c 10"
+	test "$client_args" "$server_args"
+done
+
+echo "OK. All tests passed"
+exit 0
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
new file mode 100644
index 000000000000..d60a343b1371
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
+
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+
+#include "bpf/bpf.h"
+#include "bpf/libbpf.h"
+
+#include "xdping.h"
+
+static int ifindex;
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+
+static void cleanup(int sig)
+{
+	bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+	if (sig)
+		exit(1);
+}
+
+static int get_stats(int fd, __u16 count, __u32 raddr)
+{
+	struct pinginfo pinginfo = { 0 };
+	char inaddrbuf[INET_ADDRSTRLEN];
+	struct in_addr inaddr;
+	__u16 i;
+
+	inaddr.s_addr = raddr;
+
+	printf("\nXDP RTT data:\n");
+
+	if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) {
+		perror("bpf_map_lookup elem: ");
+		return 1;
+	}
+
+	for (i = 0; i < count; i++) {
+		if (pinginfo.times[i] == 0)
+			break;
+
+		printf("64 bytes from %s: icmp_seq=%d ttl=64 time=%#.5f ms\n",
+		       inet_ntop(AF_INET, &inaddr, inaddrbuf,
+				 sizeof(inaddrbuf)),
+		       count + i + 1,
+		       (double)pinginfo.times[i]/1000000);
+	}
+
+	if (i < count) {
+		fprintf(stderr, "Expected %d samples, got %d.\n", count, i);
+		return 1;
+	}
+
+	bpf_map_delete_elem(fd, &raddr);
+
+	return 0;
+}
+
+static void show_usage(const char *prog)
+{
+	fprintf(stderr,
+		"usage: %s [OPTS] -I interface destination\n\n"
+		"OPTS:\n"
+		"    -c count		Stop after sending count requests\n"
+		"			(default %d, max %d)\n"
+		"    -I interface	interface name\n"
+		"    -N			Run in driver mode\n"
+		"    -s			Server mode\n"
+		"    -S			Run in skb mode\n",
+		prog, XDPING_DEFAULT_COUNT, XDPING_MAX_COUNT);
+}
+
+int main(int argc, char **argv)
+{
+	__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
+	struct addrinfo *a, hints = { .ai_family = AF_INET };
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	__u16 count = XDPING_DEFAULT_COUNT;
+	struct pinginfo pinginfo = { 0 };
+	const char *optstr = "c:I:NsS";
+	struct bpf_program *main_prog;
+	int prog_fd = -1, map_fd = -1;
+	struct sockaddr_in rin;
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	char *ifname = NULL;
+	char filename[256];
+	int opt, ret = 1;
+	__u32 raddr = 0;
+	int server = 0;
+	char cmd[256];
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'c':
+			count = atoi(optarg);
+			if (count < 1 || count > XDPING_MAX_COUNT) {
+				fprintf(stderr,
+					"min count is 1, max count is %d\n",
+					XDPING_MAX_COUNT);
+				return 1;
+			}
+			break;
+		case 'I':
+			ifname = optarg;
+			ifindex = if_nametoindex(ifname);
+			if (!ifindex) {
+				fprintf(stderr, "Could not get interface %s\n",
+					ifname);
+				return 1;
+			}
+			break;
+		case 'N':
+			xdp_flags |= XDP_FLAGS_DRV_MODE;
+			break;
+		case 's':
+			/* use server program */
+			server = 1;
+			break;
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		default:
+			show_usage(basename(argv[0]));
+			return 1;
+		}
+	}
+
+	if (!ifname) {
+		show_usage(basename(argv[0]));
+		return 1;
+	}
+	if (!server && optind == argc) {
+		show_usage(basename(argv[0]));
+		return 1;
+	}
+
+	if ((xdp_flags & mode_flags) == mode_flags) {
+		fprintf(stderr, "-N or -S can be specified, not both.\n");
+		show_usage(basename(argv[0]));
+		return 1;
+	}
+
+	if (!server) {
+		/* Only supports IPv4; see hints initiailization above. */
+		if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
+			fprintf(stderr, "Could not resolve %s\n", argv[optind]);
+			return 1;
+		}
+		memcpy(&rin, a->ai_addr, sizeof(rin));
+		raddr = rin.sin_addr.s_addr;
+		freeaddrinfo(a);
+	}
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
+		fprintf(stderr, "load of %s failed\n", filename);
+		return 1;
+	}
+
+	main_prog = bpf_object__find_program_by_title(obj,
+						      server ? "xdpserver" :
+							       "xdpclient");
+	if (main_prog)
+		prog_fd = bpf_program__fd(main_prog);
+	if (!main_prog || prog_fd < 0) {
+		fprintf(stderr, "could not find xdping program");
+		return 1;
+	}
+
+	map = bpf_map__next(NULL, obj);
+	if (map)
+		map_fd = bpf_map__fd(map);
+	if (!map || map_fd < 0) {
+		fprintf(stderr, "Could not find ping map");
+		goto done;
+	}
+
+	signal(SIGINT, cleanup);
+	signal(SIGTERM, cleanup);
+
+	printf("Setting up XDP for %s, please wait...\n", ifname);
+
+	printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
+
+	if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+		fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
+		goto done;
+	}
+
+	if (server) {
+		close(prog_fd);
+		close(map_fd);
+		printf("Running server on %s; press Ctrl+C to exit...\n",
+		       ifname);
+		do { } while (1);
+	}
+
+	/* Start xdping-ing from last regular ping reply, e.g. for a count
+	 * of 10 ICMP requests, we start xdping-ing using reply with seq number
+	 * 10.  The reason the last "real" ping RTT is much higher is that
+	 * the ping program sees the ICMP reply associated with the last
+	 * XDP-generated packet, so ping doesn't get a reply until XDP is done.
+	 */
+	pinginfo.seq = htons(count);
+	pinginfo.count = count;
+
+	if (bpf_map_update_elem(map_fd, &raddr, &pinginfo, BPF_ANY)) {
+		fprintf(stderr, "could not communicate with BPF map: %s\n",
+			strerror(errno));
+		cleanup(0);
+		goto done;
+	}
+
+	/* We need to wait for XDP setup to complete. */
+	sleep(10);
+
+	snprintf(cmd, sizeof(cmd), "ping -c %d -I %s %s",
+		 count, ifname, argv[optind]);
+
+	printf("\nNormal ping RTT data\n");
+	printf("[Ignore final RTT; it is distorted by XDP using the reply]\n");
+
+	ret = system(cmd);
+
+	if (!ret)
+		ret = get_stats(map_fd, count, raddr);
+
+	cleanup(0);
+
+done:
+	if (prog_fd > 0)
+		close(prog_fd);
+	if (map_fd > 0)
+		close(map_fd);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/xdping.h b/tools/testing/selftests/bpf/xdping.h
new file mode 100644
index 000000000000..afc578df77be
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdping.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. */
+
+#define	XDPING_MAX_COUNT	10
+#define	XDPING_DEFAULT_COUNT	4
+
+struct pinginfo {
+	__u64	start;
+	__be16	seq;
+	__u16	count;
+	__u32	pad;
+	__u64	times[XDPING_MAX_COUNT];
+};
-- 
cgit v1.2.3-59-g8ed1b


From 6345266a9989287336ff6a2843cd3b033cda85a1 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 30 May 2019 12:06:36 -0700
Subject: selftests: Add test cases for nexthop objects

Add functional test cases for nexthop objects.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 1026 +++++++++++++++++++++++++++
 1 file changed, 1026 insertions(+)
 create mode 100755 tools/testing/selftests/net/fib_nexthops.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
new file mode 100755
index 000000000000..c5c93d5fb3ad
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -0,0 +1,1026 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: me               | ns: peer              | ns: remote
+#   2001:db8:91::1     |       2001:db8:91::2  |
+#   172.16.1.1         |       172.16.1.2      |
+#            veth1 <---|---> veth2             |
+#                      |              veth5 <--|--> veth6  172.16.101.1
+#            veth3 <---|---> veth4             |           2001:db8:101::1
+#   172.16.2.1         |       172.16.2.2      |
+#   2001:db8:92::1     |       2001:db8:92::2  |
+#
+# This test is for checking IPv4 and IPv6 FIB behavior with nexthop
+# objects. Device reference counts and network namespace cleanup tested
+# by use of network namespace for peer.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime"
+
+ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+TESTS="${ALL_TESTS}"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+nsid=100
+
+################################################################################
+# utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "$VERBOSE" = "1" ]; then
+			echo "    rc=$rc, expected $expected"
+		fi
+
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local ns
+	local addr
+
+	[ -n "$2" ] && ns="-netns $2"
+	addr=$(ip $ns -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+create_ns()
+{
+	local n=${1}
+
+	ip netns del ${n} 2>/dev/null
+
+	set -e
+	ip netns add ${n}
+	ip netns set ${n} $((nsid++))
+	ip -netns ${n} addr add 127.0.0.1/8 dev lo
+	ip -netns ${n} link set lo up
+
+	ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1
+	ip netns exec ${n} sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.default.forwarding=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.all.accept_dad=0
+	ip netns exec ${n} sysctl -qw net.ipv6.conf.default.accept_dad=0
+
+	set +e
+}
+
+setup()
+{
+	cleanup
+
+	create_ns me
+	create_ns peer
+	create_ns remote
+
+	IP="ip -netns me"
+	set -e
+	$IP li add veth1 type veth peer name veth2
+	$IP li set veth1 up
+	$IP addr add 172.16.1.1/24 dev veth1
+	$IP -6 addr add 2001:db8:91::1/64 dev veth1
+
+	$IP li add veth3 type veth peer name veth4
+	$IP li set veth3 up
+	$IP addr add 172.16.2.1/24 dev veth3
+	$IP -6 addr add 2001:db8:92::1/64 dev veth3
+
+	$IP li set veth2 netns peer up
+	ip -netns peer addr add 172.16.1.2/24 dev veth2
+	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2
+
+	$IP li set veth4 netns peer up
+	ip -netns peer addr add 172.16.2.2/24 dev veth4
+	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4
+
+	ip -netns remote li add veth5 type veth peer name veth6
+	ip -netns remote li set veth5 up
+	ip -netns remote addr add dev veth5 172.16.101.1/24
+	ip -netns remote addr add dev veth5 2001:db8:101::1/64
+	ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
+	ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
+
+	ip -netns remote li set veth6 netns peer up
+	ip -netns peer addr add dev veth6 172.16.101.2/24
+	ip -netns peer addr add dev veth6 2001:db8:101::2/64
+	set +e
+}
+
+cleanup()
+{
+	local ns
+
+	for ns in me peer remote; do
+		ip netns del ${ns} 2>/dev/null
+	done
+}
+
+check_output()
+{
+	local out="$1"
+	local expected="$2"
+	local rc=0
+
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+check_nexthop()
+{
+	local nharg="$1"
+	local expected="$2"
+	local out
+
+	out=$($IP nexthop ls ${nharg} 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
+check_route()
+{
+	local pfx="$1"
+	local expected="$2"
+	local out
+
+	out=$($IP route ls match ${pfx} 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
+check_route6()
+{
+	local pfx="$1"
+	local expected="$2"
+	local out
+
+	out=$($IP -6 route ls match ${pfx} 2>/dev/null)
+
+	check_output "${out}" "${expected}"
+}
+
+################################################################################
+# basic operations (add, delete, replace) on nexthops and nexthop groups
+#
+# IPv6
+
+ipv6_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6"
+	echo "----------------------"
+
+	run_cmd "$IP nexthop add id 52 via 2001:db8:91::2 dev veth1"
+	rc=$?
+	log_test $rc 0 "Create nexthop with id, gw, dev"
+	if [ $rc -ne 0 ]; then
+		echo "Basic IPv6 create fails; can not continue"
+		return 1
+	fi
+
+	run_cmd "$IP nexthop get id 52"
+	log_test $? 0 "Get nexthop by id"
+	check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1"
+
+	run_cmd "$IP nexthop del id 52"
+	log_test $? 0 "Delete nexthop by id"
+	check_nexthop "id 52" ""
+
+	#
+	# gw, device spec
+	#
+	# gw validation, no device - fails since dev required
+	run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
+	log_test $? 2 "Create nexthop - gw only"
+
+	# gw is not reachable throught given dev
+	run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
+	log_test $? 2 "Create nexthop - invalid gw+dev combination"
+
+	# onlink arg overrides gw+dev lookup
+	run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1 onlink"
+	log_test $? 0 "Create nexthop - gw+dev and onlink"
+
+	# admin down should delete nexthops
+	set -e
+	run_cmd "$IP -6 nexthop add id 55 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 56 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 57 via 2001:db8:91::5 dev veth1"
+	run_cmd "$IP li set dev veth1 down"
+	set +e
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops removed on admin down"
+}
+
+ipv6_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6 groups functional"
+	echo "----------------------"
+
+	# basic functionality: create a nexthop group, default weight
+	run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 61"
+	log_test $? 0 "Create nexthop group with single nexthop"
+
+	# get nexthop group
+	run_cmd "$IP nexthop get id 101"
+	log_test $? 0 "Get nexthop group by id"
+	check_nexthop "id 101" "id 101 group 61"
+
+	# delete nexthop group
+	run_cmd "$IP nexthop del id 101"
+	log_test $? 0 "Delete nexthop group by id"
+	check_nexthop "id 101" ""
+
+	$IP nexthop flush >/dev/null 2>&1
+	check_nexthop "id 101" ""
+
+	#
+	# create group with multiple nexthops - mix of gw and dev only
+	#
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 65 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 62/63/64/65"
+	log_test $? 0 "Nexthop group with multiple nexthops"
+	check_nexthop "id 102" "id 102 group 62/63/64/65"
+
+	# Delete nexthop in a group and group is updated
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 102" "id 102 group 62/64/65"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+
+	# create group with multiple weighted nexthops
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 103 group 62/63,2/64,3/65,4"
+	log_test $? 0 "Nexthop group with weighted nexthops"
+	check_nexthop "id 103" "id 103 group 62/63,2/64,3/65,4"
+
+	# Delete nexthop in a weighted group and group is updated
+	run_cmd "$IP nexthop del id 63"
+	check_nexthop "id 103" "id 103 group 62/64,3/65,4"
+	log_test $? 0 "Weighted nexthop group updated when entry is deleted"
+
+	# admin down - nexthop is removed from group
+	run_cmd "$IP li set dev veth1 down"
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops in groups removed on admin down"
+
+	# expect groups to have been deleted as well
+	check_nexthop "" ""
+
+	run_cmd "$IP li set dev veth1 up"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	# group with nexthops using different devices
+	set -e
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+
+	run_cmd "$IP nexthop add id 72 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 73 via 2001:db8:92::3 dev veth3"
+	run_cmd "$IP nexthop add id 74 via 2001:db8:92::4 dev veth3"
+	run_cmd "$IP nexthop add id 75 via 2001:db8:92::5 dev veth3"
+	set +e
+
+	# multiple groups with same nexthop
+	run_cmd "$IP nexthop add id 104 group 62"
+	run_cmd "$IP nexthop add id 105 group 62"
+	check_nexthop "group" "id 104 group 62 id 105 group 62"
+	log_test $? 0 "Multiple groups with same nexthop"
+
+	run_cmd "$IP nexthop flush groups"
+	[ $? -ne 0 ] && return 1
+
+	# on admin down of veth1, it should be removed from the group
+	run_cmd "$IP nexthop add id 105 group 62/63/72/73/64"
+	run_cmd "$IP li set veth1 down"
+	check_nexthop "id 105" "id 105 group 72/73"
+	log_test $? 0 "Nexthops in group removed on admin down - mixed group"
+
+	run_cmd "$IP nexthop add id 106 group 105/74"
+	log_test $? 2 "Nexthop group can not have a group as an entry"
+
+	# a group can have a blackhole entry only if it is the only
+	# nexthop in the group. Needed for atomic replace with an
+	# actual nexthop group
+	run_cmd "$IP -6 nexthop add id 31 blackhole"
+	run_cmd "$IP nexthop add id 107 group 31"
+	log_test $? 0 "Nexthop group with a blackhole entry"
+
+	run_cmd "$IP nexthop add id 108 group 31/24"
+	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+}
+
+ipv6_fcnal_runtime()
+{
+	local rc
+
+	echo
+	echo "IPv6 functional runtime"
+	echo "-----------------------"
+
+	sleep 5
+
+	#
+	# IPv6 - the basics
+	#
+	run_cmd "$IP nexthop add id 81 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+	log_test $? 0 "Route add"
+
+	run_cmd "$IP ro delete 2001:db8:101::1/128 nhid 81"
+	log_test $? 0 "Route delete"
+
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 0 "Ping with nexthop"
+
+	run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 122 group 81/82"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 0 "Ping - multipath"
+
+	#
+	# IPv6 with blackhole nexthops
+	#
+	run_cmd "$IP -6 nexthop add id 83 blackhole"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 2 "Ping - blackhole"
+
+	run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 0 "Ping - blackhole replaced with gateway"
+
+	run_cmd "$IP -6 nexthop replace id 83 blackhole"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	log_test $? 2 "Ping - gateway replaced by blackhole"
+
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	if [ $? -eq 0 ]; then
+		run_cmd "$IP nexthop replace id 122 group 83"
+		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		log_test $? 2 "Ping - group with blackhole"
+
+		run_cmd "$IP nexthop replace id 122 group 81/82"
+		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		log_test $? 0 "Ping - group blackhole replaced with gateways"
+	else
+		log_test 2 0 "Ping - multipath failed"
+	fi
+
+	#
+	# device only and gw + dev only mix
+	#
+	run_cmd "$IP -6 nexthop add id 85 dev veth1"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
+	log_test $? 0 "IPv6 route with device only nexthop"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1"
+
+	run_cmd "$IP nexthop add id 123 group 81/85"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
+	log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 nexthop via 2001:db8:91::2 dev veth1 nexthop dev veth1"
+
+	#
+	# IPv6 route with v4 nexthop - not allowed
+	#
+	run_cmd "$IP ro delete 2001:db8:101::1/128"
+	run_cmd "$IP nexthop add id 84 via 172.16.1.1 dev veth1"
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 84"
+	log_test $? 2 "IPv6 route can not have a v4 gateway"
+
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 81"
+	run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
+	log_test $? 2 "Nexthop replace - v6 route, v4 nexthop"
+
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
+	run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
+	log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# weird IPv6 cases
+	#
+	run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+
+	# TO-DO:
+	# existing route with old nexthop; append route with new nexthop
+	# existing route with old nexthop; replace route with new
+	# existing route with new nexthop; replace route with old
+	# route with src address and using nexthop - not allowed
+}
+
+ipv4_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 functional"
+	echo "----------------------"
+
+	#
+	# basic IPv4 ops - add, get, delete
+	#
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	rc=$?
+	log_test $rc 0 "Create nexthop with id, gw, dev"
+	if [ $rc -ne 0 ]; then
+		echo "Basic IPv4 create fails; can not continue"
+		return 1
+	fi
+
+	run_cmd "$IP nexthop get id 12"
+	log_test $? 0 "Get nexthop by id"
+	check_nexthop "id 12" "id 12 via 172.16.1.2 src 172.16.1.1 dev veth1 scope link"
+
+	run_cmd "$IP nexthop del id 12"
+	log_test $? 0 "Delete nexthop by id"
+	check_nexthop "id 52" ""
+
+	#
+	# gw, device spec
+	#
+	# gw validation, no device - fails since dev is required
+	run_cmd "$IP nexthop add id 12 via 172.16.2.3"
+	log_test $? 2 "Create nexthop - gw only"
+
+	# gw not reachable through given dev
+	run_cmd "$IP nexthop add id 13 via 172.16.3.2 dev veth1"
+	log_test $? 2 "Create nexthop - invalid gw+dev combination"
+
+	# onlink flag overrides gw+dev lookup
+	run_cmd "$IP nexthop add id 13 via 172.16.3.2 dev veth1 onlink"
+	log_test $? 0 "Create nexthop - gw+dev and onlink"
+
+	# admin down should delete nexthops
+	set -e
+	run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 16 via 172.16.1.4 dev veth1"
+	run_cmd "$IP nexthop add id 17 via 172.16.1.5 dev veth1"
+	run_cmd "$IP li set dev veth1 down"
+	set +e
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops removed on admin down"
+}
+
+ipv4_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 groups functional"
+	echo "----------------------"
+
+	# basic functionality: create a nexthop group, default weight
+	run_cmd "$IP nexthop add id 11 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 11"
+	log_test $? 0 "Create nexthop group with single nexthop"
+
+	# get nexthop group
+	run_cmd "$IP nexthop get id 101"
+	log_test $? 0 "Get nexthop group by id"
+	check_nexthop "id 101" "id 101 group 11"
+
+	# delete nexthop group
+	run_cmd "$IP nexthop del id 101"
+	log_test $? 0 "Delete nexthop group by id"
+	check_nexthop "id 101" ""
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	#
+	# create group with multiple nexthops
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 14 via 172.16.1.4 dev veth1"
+	run_cmd "$IP nexthop add id 15 via 172.16.1.5 dev veth1"
+	run_cmd "$IP nexthop add id 102 group 12/13/14/15"
+	log_test $? 0 "Nexthop group with multiple nexthops"
+	check_nexthop "id 102" "id 102 group 12/13/14/15"
+
+	# Delete nexthop in a group and group is updated
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 102" "id 102 group 12/14/15"
+	log_test $? 0 "Nexthop group updated when entry is deleted"
+
+	# create group with multiple weighted nexthops
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 103 group 12/13,2/14,3/15,4"
+	log_test $? 0 "Nexthop group with weighted nexthops"
+	check_nexthop "id 103" "id 103 group 12/13,2/14,3/15,4"
+
+	# Delete nexthop in a weighted group and group is updated
+	run_cmd "$IP nexthop del id 13"
+	check_nexthop "id 103" "id 103 group 12/14,3/15,4"
+	log_test $? 0 "Weighted nexthop group updated when entry is deleted"
+
+	# admin down - nexthop is removed from group
+	run_cmd "$IP li set dev veth1 down"
+	check_nexthop "dev veth1" ""
+	log_test $? 0 "Nexthops in groups removed on admin down"
+
+	# expect groups to have been deleted as well
+	check_nexthop "" ""
+
+	run_cmd "$IP li set dev veth1 up"
+
+	$IP nexthop flush >/dev/null 2>&1
+
+	# group with nexthops using different devices
+	set -e
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+	run_cmd "$IP nexthop add id 14 via 172.16.1.4 dev veth1"
+	run_cmd "$IP nexthop add id 15 via 172.16.1.5 dev veth1"
+
+	run_cmd "$IP nexthop add id 22 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 23 via 172.16.2.3 dev veth3"
+	run_cmd "$IP nexthop add id 24 via 172.16.2.4 dev veth3"
+	run_cmd "$IP nexthop add id 25 via 172.16.2.5 dev veth3"
+	set +e
+
+	# multiple groups with same nexthop
+	run_cmd "$IP nexthop add id 104 group 12"
+	run_cmd "$IP nexthop add id 105 group 12"
+	check_nexthop "group" "id 104 group 12 id 105 group 12"
+	log_test $? 0 "Multiple groups with same nexthop"
+
+	run_cmd "$IP nexthop flush groups"
+	[ $? -ne 0 ] && return 1
+
+	# on admin down of veth1, it should be removed from the group
+	run_cmd "$IP nexthop add id 105 group 12/13/22/23/14"
+	run_cmd "$IP li set veth1 down"
+	check_nexthop "id 105" "id 105 group 22/23"
+	log_test $? 0 "Nexthops in group removed on admin down - mixed group"
+
+	run_cmd "$IP nexthop add id 106 group 105/24"
+	log_test $? 2 "Nexthop group can not have a group as an entry"
+
+	# a group can have a blackhole entry only if it is the only
+	# nexthop in the group. Needed for atomic replace with an
+	# actual nexthop group
+	run_cmd "$IP nexthop add id 31 blackhole"
+	run_cmd "$IP nexthop add id 107 group 31"
+	log_test $? 0 "Nexthop group with a blackhole entry"
+
+	run_cmd "$IP nexthop add id 108 group 31/24"
+	log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+}
+
+ipv4_withv6_fcnal()
+{
+	local lladdr
+
+	set -e
+	lladdr=$(get_linklocal veth2 peer)
+	run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1"
+	set +e
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 11"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+	check_route "172.16.101.1" "172.16.101.1 nhid 11 via ${lladdr} dev veth1"
+
+	set -e
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 11/12"
+	set +e
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+	log_test $? 0 "IPv4 route with IPv6 gateway"
+	check_route "172.16.101.1" "172.16.101.1 via ${lladdr} dev veth1"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1"
+	log_test $? 2 "IPv4 route with invalid IPv6 gateway"
+}
+
+ipv4_fcnal_runtime()
+{
+	local lladdr
+	local rc
+
+	echo
+	echo "IPv4 functional runtime"
+	echo "-----------------------"
+
+	run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1"
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 21"
+	log_test $? 0 "Route add"
+	check_route "172.16.101.1" "172.16.101.1 nhid 21 via 172.16.1.2 dev veth1"
+
+	run_cmd "$IP ro delete 172.16.101.1/32 nhid 21"
+	log_test $? 0 "Route delete"
+
+	#
+	# scope mismatch
+	#
+	run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1"
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 22 scope host"
+	log_test $? 2 "Route add - scope conflict with nexthop"
+
+	run_cmd "$IP nexthop replace id 22 dev veth3"
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 22 scope host"
+	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
+	log_test $? 2 "Nexthop replace with invalid scope for existing route"
+
+	#
+	# add route with nexthop and check traffic
+	#
+	run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Basic ping"
+
+	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 122 group 21/22"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Ping - multipath"
+
+	#
+	# IPv4 with blackhole nexthops
+	#
+	run_cmd "$IP nexthop add id 23 blackhole"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 2 "Ping - blackhole"
+
+	run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "Ping - blackhole replaced with gateway"
+
+	run_cmd "$IP nexthop replace id 23 blackhole"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 2 "Ping - gateway replaced by blackhole"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	if [ $? -eq 0 ]; then
+		run_cmd "$IP nexthop replace id 122 group 23"
+		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		log_test $? 2 "Ping - group with blackhole"
+
+		run_cmd "$IP nexthop replace id 122 group 21/22"
+		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		log_test $? 0 "Ping - group blackhole replaced with gateways"
+	else
+		log_test 2 0 "Ping - multipath failed"
+	fi
+
+	#
+	# device only and gw + dev only mix
+	#
+	run_cmd "$IP nexthop add id 85 dev veth1"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 85"
+	log_test $? 0 "IPv4 route with device only nexthop"
+	check_route "172.16.101.1" "172.16.101.1 nhid 85 dev veth1"
+
+	run_cmd "$IP nexthop add id 122 group 21/85"
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
+	log_test $? 0 "IPv4 multipath route with nexthop mix - dev only + gw"
+	check_route "172.16.101.1" "172.16.101.1 nhid 85 nexthop via 172.16.1.2 dev veth1 nexthop dev veth1"
+
+	#
+	# IPv4 with IPv6
+	#
+	set -e
+	lladdr=$(get_linklocal veth2 peer)
+	run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
+	set +e
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+	$IP neigh sh | grep -q "${lladdr} dev veth1"
+	if [ $? -eq 1 ]; then
+		echo "    WARNING: Neigh entry missing for ${lladdr}"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	$IP neigh sh | grep -q "172.16.101.1 dev eth1"
+	if [ $? -eq 0 ]; then
+		echo "    WARNING: Neigh entry exists for 172.16.101.1"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	set -e
+	run_cmd "$IP nexthop add id 25 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 group 24/25"
+	set +e
+	run_cmd "$IP ro replace 172.16.101.1/32 nhid 101"
+	log_test $? 0 "IPv4 route with mixed v4-v6 multipath route"
+
+	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv6 nexthop with IPv4 route"
+
+	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv4 route with IPv6 gateway"
+
+	$IP neigh sh | grep -q "${lladdr} dev veth1"
+	if [ $? -eq 1 ]; then
+		echo "    WARNING: Neigh entry missing for ${lladdr}"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	$IP neigh sh | grep -q "172.16.101.1 dev eth1"
+	if [ $? -eq 0 ]; then
+		echo "    WARNING: Neigh entry exists for 172.16.101.1"
+		$IP neigh sh | grep 'dev veth1'
+	fi
+
+	#
+	# MPLS as an example of LWT encap
+	#
+	run_cmd "$IP nexthop add id 51 encap mpls 101 via 172.16.1.2 dev veth1"
+	log_test $? 0 "IPv4 route with MPLS encap"
+	check_nexthop "id 51" "id 51 encap mpls 101 via 172.16.1.2 dev veth1 scope link"
+	log_test $? 0 "IPv4 route with MPLS encap - check"
+
+	run_cmd "$IP nexthop add id 52 encap mpls 102 via inet6 2001:db8:91::2 dev veth1"
+	log_test $? 0 "IPv4 route with MPLS encap and v6 gateway"
+	check_nexthop "id 52" "id 52 encap mpls 102 via 2001:db8:91::2 dev veth1 scope link"
+	log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
+}
+
+basic()
+{
+	echo
+	echo "Basic functional tests"
+	echo "----------------------"
+	run_cmd "$IP nexthop ls"
+	log_test $? 0 "List with nothing defined"
+
+	run_cmd "$IP nexthop get id 1"
+	log_test $? 2 "Nexthop get on non-existent id"
+
+	# attempt to create nh without a device or gw - fails
+	run_cmd "$IP nexthop add id 1"
+	log_test $? 2 "Nexthop with no device or gateway"
+
+	# attempt to create nh with down device - fails
+	$IP li set veth1 down
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	log_test $? 2 "Nexthop with down device"
+
+	# create nh with linkdown device - fails
+	$IP li set veth1 up
+	ip -netns peer li set veth2 down
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	log_test $? 2 "Nexthop with device that is linkdown"
+	ip -netns peer li set veth2 up
+
+	# device only
+	run_cmd "$IP nexthop add id 1 dev veth1"
+	log_test $? 0 "Nexthop with device only"
+
+	# create nh with duplicate id
+	run_cmd "$IP nexthop add id 1 dev veth3"
+	log_test $? 2 "Nexthop with duplicate id"
+
+	# blackhole nexthop
+	run_cmd "$IP nexthop add id 2 blackhole"
+	log_test $? 0 "Blackhole nexthop"
+
+	# blackhole nexthop can not have other specs
+	run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
+	log_test $? 2 "Blackhole nexthop with other attributes"
+
+	#
+	# groups
+	#
+
+	run_cmd "$IP nexthop add id 101 group 1"
+	log_test $? 0 "Create group"
+
+	run_cmd "$IP nexthop add id 102 group 2"
+	log_test $? 0 "Create group with blackhole nexthop"
+
+	# multipath group can not have a blackhole as 1 path
+	run_cmd "$IP nexthop add id 103 group 1/2"
+	log_test $? 2 "Create multipath group where 1 path is a blackhole"
+
+	# multipath group can not have a member replaced by a blackhole
+	run_cmd "$IP nexthop replace id 2 dev veth3"
+	run_cmd "$IP nexthop replace id 102 group 1/2"
+	run_cmd "$IP nexthop replace id 2 blackhole"
+	log_test $? 2 "Multipath group can not have a member replaced by blackhole"
+
+	# attempt to create group with non-existent nexthop
+	run_cmd "$IP nexthop add id 103 group 12"
+	log_test $? 2 "Create group with non-existent nexthop"
+
+	# attempt to create group with same nexthop
+	run_cmd "$IP nexthop add id 103 group 1/1"
+	log_test $? 2 "Create group with same nexthop multiple times"
+
+	# replace nexthop with a group - fails
+	run_cmd "$IP nexthop replace id 2 group 1"
+	log_test $? 2 "Replace nexthop with nexthop group"
+
+	# replace nexthop group with a nexthop - fails
+	run_cmd "$IP nexthop replace id 101 dev veth1"
+	log_test $? 2 "Replace nexthop group with nexthop"
+
+	# nexthop group with other attributes fail
+	run_cmd "$IP nexthop add id 104 group 1 dev veth1"
+	log_test $? 2 "Nexthop group and device"
+
+	run_cmd "$IP nexthop add id 104 group 1 blackhole"
+	log_test $? 2 "Nexthop group and blackhole"
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $ALL_TESTS)
+        -4          IPv4 tests only
+        -6          IPv6 tests only
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+
+    Runtime test
+	-n num	    Number of nexthops to target
+	-N    	    Use new style to install routes in DUT
+
+done
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pP46hv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		4) TESTS=${IPV4_TESTS};;
+		6) TESTS=${IPV6_TESTS};;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip help 2>&1 | grep -q nexthop
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing nexthop command"
+	exit $ksft_skip
+fi
+
+out=$(ip nexthop ls 2>&1 | grep -q "Operation not supported")
+if [ $? -eq 0 ]; then
+	echo "SKIP: kernel lacks nexthop support"
+	exit $ksft_skip
+fi
+
+for t in $TESTS
+do
+	case $t in
+	none) IP="ip -netns peer"; setup; exit 0;;
+	*) setup; $t; cleanup;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
-- 
cgit v1.2.3-59-g8ed1b


From fc4c93f145d7812fa3a93d688492041684993fbb Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Tue, 4 Jun 2019 15:40:44 +0200
Subject: selftests: add basic netdevsim devlink flash testing

Utilizes the devlink flash code.

Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/netdevsim/devlink.sh     | 53 ++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/netdevsim/devlink.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
new file mode 100755
index 000000000000..9d8baf5d14b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="fw_flash_test"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+BUS_ADDR=10
+PORT_COUNT=4
+DEV_NAME=netdevsim$BUS_ADDR
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
+DL_HANDLE=netdevsim/$DEV_NAME
+
+fw_flash_test()
+{
+	RET=0
+
+	devlink dev flash $DL_HANDLE file dummy
+	check_err $? "Failed to flash with status updates on"
+
+	echo "n"> $DEBUGFS_DIR/fw_update_status
+	check_err $? "Failed to disable status updates"
+
+	devlink dev flash $DL_HANDLE file dummy
+	check_err $? "Failed to flash with status updates off"
+
+	log_test "fw flash test"
+}
+
+setup_prepare()
+{
+	modprobe netdevsim
+	echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device
+	while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+}
+
+cleanup()
+{
+	pre_cleanup
+	echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device
+	modprobe -r netdevsim
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-59-g8ed1b


From 243781dba7907d0f61b4f6da50c2f6f9ecc43186 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 8 Jun 2019 14:53:36 -0700
Subject: selftests: pmtu: Move running of test into a new function

Move the block of code that runs a test and prints the verdict to a
new function, run_test.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 63 +++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 4a1275990d7e..3d6b21c4b1db 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1090,6 +1090,41 @@ test_cleanup_ipv4_exception() {
 	test_cleanup_vxlanX_exception 4
 }
 
+run_test() {
+	(
+	tname="$1"
+	tdesc="$2"
+
+	unset IFS
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "\n##########################################################################\n\n"
+	fi
+
+	eval test_${tname}
+	ret=$?
+
+	if [ $ret -eq 0 ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${tdesc}"
+	elif [ $ret -eq 1 ]; then
+		printf "TEST: %-60s  [FAIL]\n" "${tdesc}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "Pausing. Hit enter to continue"
+			read a
+		fi
+		err_flush
+		exit 1
+	elif [ $ret -eq 2 ]; then
+		printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
+		err_flush
+	fi
+
+	return $ret
+	)
+	[ $? -ne 0 ] && exitcode=1
+}
+
 usage() {
 	echo
 	echo "$0 [OPTIONS] [TEST]..."
@@ -1147,33 +1182,7 @@ for t in ${tests}; do
 	done
 	[ $run_this -eq 0 ] && continue
 
-	(
-		unset IFS
-
-		if [ "$VERBOSE" = "1" ]; then
-			printf "\n##########################################################################\n\n"
-		fi
-
-		eval test_${name}
-		ret=$?
-
-		if [ $ret -eq 0 ]; then
-			printf "TEST: %-60s  [ OK ]\n" "${t}"
-		elif [ $ret -eq 1 ]; then
-			printf "TEST: %-60s  [FAIL]\n" "${t}"
-			if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
-				echo
-				echo "Pausing. Hit enter to continue"
-				read a
-			fi
-			err_flush
-			exit 1
-		elif [ $ret -eq 2 ]; then
-			printf "TEST: %-60s  [SKIP]\n" "${t}"
-			err_flush
-		fi
-	)
-	[ $? -ne 0 ] && exitcode=1
+	run_test "${name}" "${t}"
 done
 
 exit ${exitcode}
-- 
cgit v1.2.3-59-g8ed1b


From f4ca0c341ce03e4a170c4d4535dc6f8e86a8b5e3 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 8 Jun 2019 14:53:37 -0700
Subject: selftests: pmtu: Move route installs to a new function

Move the route add commands to a new function called setup_routing_old.
The '_old' refers to the classic way of installing routes.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 3d6b21c4b1db..14ffcf490032 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -448,6 +448,20 @@ setup_xfrm6() {
 	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
 }
 
+setup_routing_old() {
+	for i in ${routes}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
+		[ "${gw}" = "" ]	&& gw="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} route add ${addr} via ${gw}
+
+		ns=""; addr=""; gw=""
+	done
+}
+
 setup_routing() {
 	for i in ${NS_R1} ${NS_R2}; do
 		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
@@ -478,17 +492,7 @@ setup_routing() {
 		ns=""; peer=""; segment=""
 	done
 
-	for i in ${routes}; do
-		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
-		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
-		[ "${gw}" = "" ]	&& gw="${i}"
-
-		ns_name="$(nsname ${ns})"
-
-		ip -n ${ns_name} route add ${addr} via ${gw}
-
-		ns=""; addr=""; gw=""
-	done
+	setup_routing_old
 }
 
 setup() {
-- 
cgit v1.2.3-59-g8ed1b


From 438a9a856ba481a2e56a1acf6edcbe05b6484237 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 8 Jun 2019 14:53:38 -0700
Subject: selftests: pmtu: Add support for routing via nexthop objects

Add routing setup using nexthop objects and repeat tests with
old and new routing.

Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 158 ++++++++++++++++++++++++++++--------
 1 file changed, 126 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 14ffcf490032..9e6d8b704186 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -123,34 +123,35 @@ TRACING=0
 # Some systems don't have a ping6 binary anymore
 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
 
+#               Name                          Description                  re-run with nh
 tests="
-	pmtu_ipv4_exception		ipv4: PMTU exceptions
-	pmtu_ipv6_exception		ipv6: PMTU exceptions
-	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions
-	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions
-	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions
-	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions
-	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions
-	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions
-	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions
-	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions
-	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions
-	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions
-	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions
-	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions
-	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions
-	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions
-	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions
-	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions
-	pmtu_vti6_exception		vti6: PMTU exceptions
-	pmtu_vti4_exception		vti4: PMTU exceptions
-	pmtu_vti4_default_mtu		vti4: default MTU assignment
-	pmtu_vti6_default_mtu		vti6: default MTU assignment
-	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
-	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
-	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes
-	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions
-	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions"
+	pmtu_ipv4_exception		ipv4: PMTU exceptions			1
+	pmtu_ipv6_exception		ipv6: PMTU exceptions			1
+	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions	1
+	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions	1
+	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions	1
+	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions	1
+	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions	1
+	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions	1
+	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions	1
+	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions	1
+	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions		1
+	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions		1
+	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions		1
+	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions		1
+	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions		1
+	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions		1
+	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions		1
+	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions		1
+	pmtu_vti6_exception		vti6: PMTU exceptions			0
+	pmtu_vti4_exception		vti4: PMTU exceptions			0
+	pmtu_vti4_default_mtu		vti4: default MTU assignment		0
+	pmtu_vti6_default_mtu		vti6: default MTU assignment		0
+	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation	0
+	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation	0
+	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes	0
+	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions	1
+	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1"
 
 NS_A="ns-A"
 NS_B="ns-B"
@@ -194,6 +195,30 @@ routes="
 	B	default			${prefix6}:${b_r1}::2
 "
 
+USE_NH="no"
+#	ns	family	nh id	   destination		gateway
+nexthops="
+	A	4	41	${prefix4}.${a_r1}.2	veth_A-R1
+	A	4	42	${prefix4}.${a_r2}.2	veth_A-R2
+	B	4	41	${prefix4}.${b_r1}.2	veth_B-R1
+
+	A	6	61	${prefix6}:${a_r1}::2	veth_A-R1
+	A	6	62	${prefix6}:${a_r2}::2	veth_A-R2
+	B	6	61	${prefix6}:${b_r1}::2	veth_B-R1
+"
+
+# nexthop id correlates to id in nexthops config above
+#	ns    family	prefix			nh id
+routes_nh="
+	A	4	default			41
+	A	4	${prefix4}.${b_r2}.1	42
+	B	4	default			41
+
+	A	6	default			61
+	A	6	${prefix6}:${b_r2}::1	62
+	B	6	default			61
+"
+
 veth4_a_addr="192.168.1.1"
 veth4_b_addr="192.168.1.2"
 veth4_mask="24"
@@ -462,6 +487,36 @@ setup_routing_old() {
 	done
 }
 
+setup_routing_new() {
+	for i in ${nexthops}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${fam}" = "" ]	&& fam="${i}"		&& continue
+		[ "${nhid}" = "" ]	&& nhid="${i}"		&& continue
+		[ "${gw}" = "" ]	&& gw="${i}"		&& continue
+		[ "${dev}" = "" ]	&& dev="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
+
+		ns=""; fam=""; nhid=""; gw=""; dev=""
+
+	done
+
+	for i in ${routes_nh}; do
+		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
+		[ "${fam}" = "" ]	&& fam="${i}"		&& continue
+		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
+		[ "${nhid}" = "" ]	&& nhid="${i}"
+
+		ns_name="$(nsname ${ns})"
+
+		ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
+
+		ns=""; fam=""; addr=""; nhid=""
+	done
+}
+
 setup_routing() {
 	for i in ${NS_R1} ${NS_R2}; do
 		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
@@ -492,7 +547,13 @@ setup_routing() {
 		ns=""; peer=""; segment=""
 	done
 
-	setup_routing_old
+	if [ "$USE_NH" = "yes" ]; then
+		setup_routing_new
+	else
+		setup_routing_old
+	fi
+
+	return 0
 }
 
 setup() {
@@ -1126,7 +1187,19 @@ run_test() {
 
 	return $ret
 	)
-	[ $? -ne 0 ] && exitcode=1
+	ret=$?
+	[ $ret -ne 0 ] && exitcode=1
+
+	return $ret
+}
+
+run_test_nh() {
+	tname="$1"
+	tdesc="$2"
+
+	USE_NH=yes
+	run_test "${tname}" "${tdesc} - nexthop objects"
+	USE_NH=no
 }
 
 usage() {
@@ -1175,8 +1248,20 @@ trap cleanup EXIT
 # start clean
 cleanup
 
+HAVE_NH=no
+ip nexthop ls >/dev/null 2>&1
+[ $? -eq 0 ] && HAVE_NH=yes
+
+name=""
+desc=""
+rerun_nh=0
 for t in ${tests}; do
-	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+	[ "${name}" = "" ]	&& name="${t}"	&& continue
+	[ "${desc}" = "" ]	&& desc="${t}"	&& continue
+
+	if [ "${HAVE_NH}" = "yes" ]; then
+		rerun_nh="${t}"
+	fi
 
 	run_this=1
 	for arg do
@@ -1184,9 +1269,18 @@ for t in ${tests}; do
 		[ "${arg}" = "${name}" ] && run_this=1 && break
 		run_this=0
 	done
-	[ $run_this -eq 0 ] && continue
+	if [ $run_this -eq 1 ]; then
+		run_test "${name}" "${desc}"
+		# if test was skipped no need to retry with nexthop objects
+		[ $? -eq 2 ] && rerun_nh=0
 
-	run_test "${name}" "${t}"
+		if [ "${rerun_nh}" = "1" ]; then
+			run_test_nh "${name}" "${desc}"
+		fi
+	fi
+	name=""
+	desc=""
+	rerun_nh=0
 done
 
 exit ${exitcode}
-- 
cgit v1.2.3-59-g8ed1b


From 622946d9186e132d36a3e92b94bbdf375637d9bd Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 8 Jun 2019 14:53:39 -0700
Subject: selftests: icmp_redirect: Add support for routing via nexthop objects

Add a second pass to icmp_redirect.sh to use nexthop objects for
routes.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/icmp_redirect.sh | 49 ++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 76a7c4472dc3..18c5de53558a 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -331,6 +331,38 @@ run_ping()
 	run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
 }
 
+replace_route_new()
+{
+	# r1 to h2 via r2 and eth0
+	run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
+	run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
+}
+
+reset_route_new()
+{
+	run_cmd ip -netns r1 nexthop flush
+	run_cmd ip -netns h1 nexthop flush
+
+	initial_route_new
+}
+
+initial_route_new()
+{
+	# r1 to h2 via r2 and eth1
+	run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
+	run_cmd ip -netns r1 ro add ${H2_N2} nhid 1
+
+	run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
+	run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2
+
+	# h1 to h2 via r1
+	run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0
+	run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
+
+	run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0
+	run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
+}
+
 replace_route_legacy()
 {
 	# r1 to h2 via r2 and eth0
@@ -479,6 +511,23 @@ WITH_VRF=yes
 setup
 do_test "legacy"
 
+cleanup
+log_section "Routing with nexthop objects"
+ip nexthop ls >/dev/null 2>&1
+if [ $? -eq 0 ]; then
+	WITH_VRF=no
+	setup
+	do_test "new"
+
+	cleanup
+	log_section "Routing with nexthop objects and VRF"
+	WITH_VRF=yes
+	setup
+	do_test "new"
+else
+	echo "Nexthop objects not supported; skipping tests"
+fi
+
 printf "\nTests passed: %3d\n" ${nsuccess}
 printf "Tests failed: %3d\n"   ${nfail}
 
-- 
cgit v1.2.3-59-g8ed1b


From 735ab2f65dced1f5285c50dc6dcb39dadf9eb6ad Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 8 Jun 2019 14:53:40 -0700
Subject: selftests: Add test with multiple prefixes using single nexthop

Add tests where multiple FIB entries use the same nexthop object. Generate
per-cpu cached routes for each by running ping on each cpu, and then
generate exceptions unique to each prefix (remote host) with different
mtus.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/fib_nexthop_multiprefix.sh       | 290 +++++++++++++++++++++
 1 file changed, 290 insertions(+)
 create mode 100755 tools/testing/selftests/net/fib_nexthop_multiprefix.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
new file mode 100755
index 000000000000..e6828732843e
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Validate cached routes in fib{6}_nh that is used by multiple prefixes.
+# Validate a different # exception is generated in h0 for each remote host.
+#
+#               h1
+#            /
+#    h0 - r1 -  h2
+#            \
+#               h3
+#
+# routing in h0 to hN is done with nexthop objects.
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "$out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+################################################################################
+# config
+
+create_ns()
+{
+	local ns=${1}
+
+	ip netns del ${ns} 2>/dev/null
+
+	ip netns add ${ns}
+	ip -netns ${ns} addr add 127.0.0.1/8 dev lo
+	ip -netns ${ns} link set lo up
+
+	ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+	case ${ns} in
+	h*)
+		ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+		;;
+	r*)
+		ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+		ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		;;
+	esac
+}
+
+setup()
+{
+	local ns
+	local i
+
+	#set -e
+
+	for ns in h0 r1 h1 h2 h3
+	do
+		create_ns ${ns}
+	done
+
+	#
+	# create interconnects
+	#
+
+	for i in 0 1 2 3
+	do
+		ip -netns h${i} li add eth0 type veth peer name r1h${i}
+		ip -netns h${i} li set eth0 up
+		ip -netns h${i} li set r1h${i} netns r1 name eth${i} up
+
+		ip -netns h${i}    addr add dev eth0 172.16.10${i}.1/24
+		ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64
+		ip -netns r1    addr add dev eth${i} 172.16.10${i}.254/24
+		ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64
+	done
+
+	ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0
+	ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0
+
+	# routing from h0 to h1-h3 and back
+	for i in 1 2 3
+	do
+		ip -netns h0    ro add 172.16.10${i}.0/24 nhid 4
+		ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254
+
+		ip -netns h0    -6 ro add 2001:db8:10${i}::/64 nhid 6
+		ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
+	done
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "host 1 config"
+		ip -netns h0 li sh
+		ip -netns h0 ro sh
+		ip -netns h0 -6 ro sh
+	fi
+
+	#set +e
+}
+
+cleanup()
+{
+	for n in h1 r1 h2 h3 h4
+	do
+		ip netns del ${n} 2>/dev/null
+	done
+}
+
+change_mtu()
+{
+	local hostid=$1
+	local mtu=$2
+
+	run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu}
+	run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu}
+}
+
+################################################################################
+# validate exceptions
+
+validate_v4_exception()
+{
+	local i=$1
+	local mtu=$2
+	local ping_sz=$3
+	local dst="172.16.10${i}.1"
+	local h0=172.16.100.1
+	local r1=172.16.100.254
+	local rc
+
+	if [ ${ping_sz} != "0" ]; then
+		run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst}
+	fi
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "Route get"
+		ip -netns h0 ro get ${dst}
+		echo "Searching for:"
+		echo "    cache .* mtu ${mtu}"
+		echo
+	fi
+
+	ip -netns h0 ro get ${dst} | \
+	grep -q "cache .* mtu ${mtu}"
+	rc=$?
+
+	log_test $rc 0 "IPv4: host 0 to host ${i}, mtu ${mtu}"
+}
+
+validate_v6_exception()
+{
+	local i=$1
+	local mtu=$2
+	local ping_sz=$3
+	local dst="2001:db8:10${i}::1"
+	local h0=2001:db8:100::1
+	local r1=2001:db8:100::64
+	local rc
+
+	if [ ${ping_sz} != "0" ]; then
+		run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst}
+	fi
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "Route get"
+		ip -netns h0 -6 ro get ${dst}
+		echo "Searching for:"
+		echo "    ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+		echo
+	fi
+
+	ip -netns h0 -6 ro get ${dst} | \
+	grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+	rc=$?
+
+	log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}"
+}
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=1;;
+	esac
+done
+
+cleanup
+setup
+sleep 2
+
+cpus=$(cat  /sys/devices/system/cpu/online)
+cpus="$(seq ${cpus/-/ })"
+ret=0
+for i in 1 2 3
+do
+	# generate a cached route per-cpu
+	for c in ${cpus}; do
+		run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
+		[ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
+
+		run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1
+		[ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
+
+		[ $ret -ne 0 ] && break
+	done
+	[ $ret -ne 0 ] && break
+done
+
+if [ $ret -eq 0 ]; then
+	# generate different exceptions in h0 for h1, h2 and h3
+	change_mtu 1 1300
+	validate_v4_exception 1 1300 1350
+	validate_v6_exception 1 1300 1350
+	echo
+
+	change_mtu 2 1350
+	validate_v4_exception 2 1350 1400
+	validate_v6_exception 2 1350 1400
+	echo
+
+	change_mtu 3 1400
+	validate_v4_exception 3 1400 1450
+	validate_v6_exception 3 1400 1450
+	echo
+
+	validate_v4_exception 1 1300 0
+	validate_v6_exception 1 1300 0
+	echo
+
+	validate_v4_exception 2 1350 0
+	validate_v6_exception 2 1350 0
+	echo
+
+	validate_v4_exception 3 1400 0
+	validate_v6_exception 3 1400 0
+
+	# targeted deletes to trigger cleanup paths in kernel
+	ip -netns h0 ro del 172.16.102.0/24 nhid 4
+	ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6
+
+	ip -netns h0 nexthop del id 4
+	ip -netns h0 nexthop del id 6
+fi
+
+cleanup
-- 
cgit v1.2.3-59-g8ed1b


From cab14d1087d9be915a7174e47d1f2b9fa4c78ab4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sat, 8 Jun 2019 14:53:41 -0700
Subject: selftests: Add version of router_multipath.sh using nexthop objects

Add a version of router_multipath.sh that uses nexthop objects for
routes.

Ido requested a version that does not cause regressions with mlxsw
testing since it does not support nexthop objects yet.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/router_mpath_nh.sh    | 359 +++++++++++++++++++++
 1 file changed, 359 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/router_mpath_nh.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
new file mode 100755
index 000000000000..cf3d26c233e8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -0,0 +1,359 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip nexthop del id 103
+	ip nexthop del id 101
+	ip nexthop del id 102
+	ip nexthop del id 106
+	ip nexthop del id 104
+	ip nexthop del id 105
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip nexthop del id 201
+	ip nexthop del id 202
+	ip nexthop del id 204
+	ip nexthop del id 205
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+	ip nexthop add id 101 via 169.254.2.22 dev $rp12
+	ip nexthop add id 102 via 169.254.3.23 dev $rp13
+	ip nexthop add id 103 group 101/102
+	ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+	ip nexthop add id 104 via fe80:2::22 dev $rp12
+	ip nexthop add id 105 via fe80:3::23 dev $rp13
+	ip nexthop add id 106 group 104/105
+	ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+	ip nexthop add id 201 via 169.254.2.12 dev $rp22
+	ip nexthop add id 202 via 169.254.3.13 dev $rp23
+	ip nexthop add id 203 group 201/202
+	ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+	ip nexthop add id 204 via fe80:2::12 dev $rp22
+	ip nexthop add id 205 via fe80:3::13 dev $rp23
+	ip nexthop add id 206 group 204/205
+	ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the configured weights.
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip nexthop replace id 103 group 101,$weight_rp12/102,$weight_rp13
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	# Restore settings.
+	ip nexthop replace id 103 group 101/102
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	# Transmit multiple flows from h1 to h2 and make sure they are
+	# distributed between both multipath links (rp12 and rp13)
+	# according to the configured weights.
+	sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+	ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+		-d 1msec -t udp "sp=1024,dp=0-32768"
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	ip nexthop replace id 106 group 104/105
+
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+	local packets_rp12 packets_rp13
+
+	ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
+
+	t0_rp12=$(link_stats_tx_packets_get $rp12)
+	t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+	# Generate 16384 echo requests, each with a random flow label.
+	for _ in $(seq 1 16384); do
+		ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+	done
+
+	t1_rp12=$(link_stats_tx_packets_get $rp12)
+	t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+	let "packets_rp12 = $t1_rp12 - $t0_rp12"
+	let "packets_rp13 = $t1_rp13 - $t0_rp13"
+	multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+	ip nexthop replace id 106 group 104/105
+}
+
+multipath_test()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+	routing_nh_obj
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Nexthop objects not supported; skipping tests"
+	exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-59-g8ed1b


From fada7fdc83c0bf8755956bff707c42b609223301 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Thu, 6 Jun 2019 13:59:40 -0700
Subject: bpf: Allow bpf_map_lookup_elem() on an xskmap

Currently, the AF_XDP code uses a separate map in order to
determine if an xsk is bound to a queue.  Instead of doing this,
have bpf_map_lookup_elem() return a xdp_sock.

Rearrange some xdp_sock members to eliminate structure holes.

Remove selftest - will be added back in later patch.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  8 +++++
 include/net/xdp_sock.h                             |  4 +--
 include/uapi/linux/bpf.h                           |  4 +++
 kernel/bpf/verifier.c                              | 26 ++++++++++++--
 kernel/bpf/xskmap.c                                |  7 ++++
 net/core/filter.c                                  | 40 ++++++++++++++++++++++
 .../selftests/bpf/verifier/prevent_map_lookup.c    | 15 --------
 7 files changed, 85 insertions(+), 19 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e5a309e6a400..1fe137afa898 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -280,6 +280,7 @@ enum bpf_reg_type {
 	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
 	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
+	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -727,6 +728,13 @@ void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
 void __cpu_map_flush(struct bpf_map *map);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
+bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info);
+u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog,
+				    u32 *target_size);
 
 /* Return map's numa specified by userspace */
 static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index d074b6d60f8a..ae0f368a62bb 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -58,11 +58,11 @@ struct xdp_sock {
 	struct xdp_umem *umem;
 	struct list_head flush_node;
 	u16 queue_id;
-	struct xsk_queue *tx ____cacheline_aligned_in_smp;
-	struct list_head list;
 	bool zc;
 	/* Protects multiple processes in the control path */
 	struct mutex mutex;
+	struct xsk_queue *tx ____cacheline_aligned_in_smp;
+	struct list_head list;
 	/* Mutual exclusion of NAPI TX thread and sendmsg error paths
 	 * in the SKB destructor callback.
 	 */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7c6aef253173..ae0907d8c03a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3083,6 +3083,10 @@ struct bpf_sock_tuple {
 	};
 };
 
+struct bpf_xdp_sock {
+	__u32 queue_id;
+};
+
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5c2cb5bd84ce..8d1786357a09 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -334,7 +334,8 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)
 {
 	return type == PTR_TO_SOCKET ||
 		type == PTR_TO_SOCK_COMMON ||
-		type == PTR_TO_TCP_SOCK;
+		type == PTR_TO_TCP_SOCK ||
+		type == PTR_TO_XDP_SOCK;
 }
 
 static bool reg_type_may_be_null(enum bpf_reg_type type)
@@ -406,6 +407,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_TCP_SOCK]	= "tcp_sock",
 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 	[PTR_TO_TP_BUFFER]	= "tp_buffer",
+	[PTR_TO_XDP_SOCK]	= "xdp_sock",
 };
 
 static char slot_type_char[] = {
@@ -1363,6 +1365,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_SOCK_COMMON_OR_NULL:
 	case PTR_TO_TCP_SOCK:
 	case PTR_TO_TCP_SOCK_OR_NULL:
+	case PTR_TO_XDP_SOCK:
 		return true;
 	default:
 		return false;
@@ -1843,6 +1846,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 	case PTR_TO_TCP_SOCK:
 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
 		break;
+	case PTR_TO_XDP_SOCK:
+		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
+		break;
 	default:
 		valid = false;
 	}
@@ -2007,6 +2013,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 	case PTR_TO_TCP_SOCK:
 		pointer_desc = "tcp_sock ";
 		break;
+	case PTR_TO_XDP_SOCK:
+		pointer_desc = "xdp_sock ";
+		break;
 	default:
 		break;
 	}
@@ -2905,10 +2914,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	 * appear.
 	 */
 	case BPF_MAP_TYPE_CPUMAP:
-	case BPF_MAP_TYPE_XSKMAP:
 		if (func_id != BPF_FUNC_redirect_map)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_XSKMAP:
+		if (func_id != BPF_FUNC_redirect_map &&
+		    func_id != BPF_FUNC_map_lookup_elem)
+			goto error;
+		break;
 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 	case BPF_MAP_TYPE_HASH_OF_MAPS:
 		if (func_id != BPF_FUNC_map_lookup_elem)
@@ -3799,6 +3812,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	case PTR_TO_SOCK_COMMON_OR_NULL:
 	case PTR_TO_TCP_SOCK:
 	case PTR_TO_TCP_SOCK_OR_NULL:
+	case PTR_TO_XDP_SOCK:
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
@@ -5038,6 +5052,9 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			if (reg->map_ptr->inner_map_meta) {
 				reg->type = CONST_PTR_TO_MAP;
 				reg->map_ptr = reg->map_ptr->inner_map_meta;
+			} else if (reg->map_ptr->map_type ==
+				   BPF_MAP_TYPE_XSKMAP) {
+				reg->type = PTR_TO_XDP_SOCK;
 			} else {
 				reg->type = PTR_TO_MAP_VALUE;
 			}
@@ -6299,6 +6316,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_SOCK_COMMON_OR_NULL:
 	case PTR_TO_TCP_SOCK:
 	case PTR_TO_TCP_SOCK_OR_NULL:
+	case PTR_TO_XDP_SOCK:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
@@ -6693,6 +6711,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 	case PTR_TO_SOCK_COMMON_OR_NULL:
 	case PTR_TO_TCP_SOCK:
 	case PTR_TO_TCP_SOCK_OR_NULL:
+	case PTR_TO_XDP_SOCK:
 		return false;
 	default:
 		return true;
@@ -7826,6 +7845,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		case PTR_TO_TCP_SOCK:
 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
 			break;
+		case PTR_TO_XDP_SOCK:
+			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
+			break;
 		default:
 			continue;
 		}
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 413d75f4fc72..ef7338cebd18 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -151,6 +151,12 @@ void __xsk_map_flush(struct bpf_map *map)
 }
 
 static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return __xsk_map_lookup_elem(map, *(u32 *)key);
+}
+
+static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -218,6 +224,7 @@ const struct bpf_map_ops xsk_map_ops = {
 	.map_free = xsk_map_free,
 	.map_get_next_key = xsk_map_get_next_key,
 	.map_lookup_elem = xsk_map_lookup_elem,
+	.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
 	.map_update_elem = xsk_map_update_elem,
 	.map_delete_elem = xsk_map_delete_elem,
 	.map_check_btf = map_check_no_btf,
diff --git a/net/core/filter.c b/net/core/filter.c
index f2777dc0b624..a5e4ac7fcbe5 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5680,6 +5680,46 @@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
 	return INET_ECN_set_ce(skb);
 }
 
+bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
+		return false;
+
+	if (off % size != 0)
+		return false;
+
+	switch (off) {
+	default:
+		return size == sizeof(__u32);
+	}
+}
+
+u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+#define BPF_XDP_SOCK_GET(FIELD)						\
+	do {								\
+		BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) >	\
+			     FIELD_SIZEOF(struct bpf_xdp_sock, FIELD));	\
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
+				      si->dst_reg, si->src_reg,		\
+				      offsetof(struct xdp_sock, FIELD)); \
+	} while (0)
+
+	switch (si->off) {
+	case offsetof(struct bpf_xdp_sock, queue_id):
+		BPF_XDP_SOCK_GET(queue_id);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
 	.func           = bpf_skb_ecn_set_ce,
 	.gpl_only       = false,
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
index bbdba990fefb..da7a4b37cb98 100644
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
@@ -28,21 +28,6 @@
 	.errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
 	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
 },
-{
-	"prevent map lookup in xskmap",
-	.insns = {
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_map_xskmap = { 3 },
-	.result = REJECT,
-	.errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem",
-	.prog_type = BPF_PROG_TYPE_XDP,
-},
 {
 	"prevent map lookup in stack trace",
 	.insns = {
-- 
cgit v1.2.3-59-g8ed1b


From 91eda5998c313282b5acfcc0b5133556382d0737 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Thu, 6 Jun 2019 13:59:41 -0700
Subject: bpf/tools: sync bpf.h

Sync uapi/linux/bpf.h

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7c6aef253173..ae0907d8c03a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3083,6 +3083,10 @@ struct bpf_sock_tuple {
 	};
 };
 
+struct bpf_xdp_sock {
+	__u32 queue_id;
+};
+
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
-- 
cgit v1.2.3-59-g8ed1b


From 940e7be34ab3d04b7e0b6dd57bdbda1d225c3d8c Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Thu, 6 Jun 2019 13:59:42 -0700
Subject: tools/bpf: Add bpf_map_lookup_elem selftest for xskmap

Check that bpf_map_lookup_elem lookup and structure
access operats correctly.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/sock.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index b31cd2cf50d0..9ed192e14f5f 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -498,3 +498,21 @@
 	.result = REJECT,
 	.errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
 },
+{
+	"bpf_map_lookup_elem(xskmap, &key); xs->queue_id",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_xskmap = { 3 },
+	.prog_type = BPF_PROG_TYPE_XDP,
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3-59-g8ed1b


From 10a13bb40e541d8cb1d5f36403aae3a0f1a2ee58 Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Thu, 6 Jun 2019 13:59:43 -0700
Subject: libbpf: remove qidconf and better support external bpf programs.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the recent change to XSKMAP bpf_map_lookup_elem() to test if
there is a xsk present in the map instead of duplicating the work
with qidconf.

Fix things so callers using XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD
bypass any internal bpf maps, so xsk_socket__{create|delete} works
properly.

Clean up error handling path.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Tested-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/xsk.c | 103 ++++++++++++++--------------------------------------
 1 file changed, 28 insertions(+), 75 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 38667b62f1fe..7ef6293b4fd7 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -60,10 +60,8 @@ struct xsk_socket {
 	struct xsk_umem *umem;
 	struct xsk_socket_config config;
 	int fd;
-	int xsks_map;
 	int ifindex;
 	int prog_fd;
-	int qidconf_map_fd;
 	int xsks_map_fd;
 	__u32 queue_id;
 	char ifname[IFNAMSIZ];
@@ -265,15 +263,11 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 	/* This is the C-program:
 	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
 	 * {
-	 *     int *qidconf, index = ctx->rx_queue_index;
+	 *     int index = ctx->rx_queue_index;
 	 *
 	 *     // A set entry here means that the correspnding queue_id
 	 *     // has an active AF_XDP socket bound to it.
-	 *     qidconf = bpf_map_lookup_elem(&qidconf_map, &index);
-	 *     if (!qidconf)
-	 *         return XDP_ABORTED;
-	 *
-	 *     if (*qidconf)
+	 *     if (bpf_map_lookup_elem(&xsks_map, &index))
 	 *         return bpf_redirect_map(&xsks_map, index, 0);
 	 *
 	 *     return XDP_PASS;
@@ -286,15 +280,10 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
 		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
-		BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd),
+		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
 		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-		BPF_MOV32_IMM(BPF_REG_0, 0),
-		/* if r1 == 0 goto +8 */
-		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
 		BPF_MOV32_IMM(BPF_REG_0, 2),
-		/* r1 = *(u32 *)(r1 + 0) */
-		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
 		/* if r1 == 0 goto +5 */
 		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
 		/* r2 = *(u32 *)(r10 - 4) */
@@ -366,18 +355,11 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 	if (max_queues < 0)
 		return max_queues;
 
-	fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map",
+	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
 				 sizeof(int), sizeof(int), max_queues, 0);
 	if (fd < 0)
 		return fd;
-	xsk->qidconf_map_fd = fd;
 
-	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
-				 sizeof(int), sizeof(int), max_queues, 0);
-	if (fd < 0) {
-		close(xsk->qidconf_map_fd);
-		return fd;
-	}
 	xsk->xsks_map_fd = fd;
 
 	return 0;
@@ -385,10 +367,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
 
 static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
 {
-	close(xsk->qidconf_map_fd);
+	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
 	close(xsk->xsks_map_fd);
-	xsk->qidconf_map_fd = -1;
-	xsk->xsks_map_fd = -1;
 }
 
 static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
@@ -417,10 +397,9 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	if (err)
 		goto out_map_ids;
 
-	for (i = 0; i < prog_info.nr_map_ids; i++) {
-		if (xsk->qidconf_map_fd != -1 && xsk->xsks_map_fd != -1)
-			break;
+	xsk->xsks_map_fd = -1;
 
+	for (i = 0; i < prog_info.nr_map_ids; i++) {
 		fd = bpf_map_get_fd_by_id(map_ids[i]);
 		if (fd < 0)
 			continue;
@@ -431,11 +410,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 			continue;
 		}
 
-		if (!strcmp(map_info.name, "qidconf_map")) {
-			xsk->qidconf_map_fd = fd;
-			continue;
-		}
-
 		if (!strcmp(map_info.name, "xsks_map")) {
 			xsk->xsks_map_fd = fd;
 			continue;
@@ -445,40 +419,18 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
 	}
 
 	err = 0;
-	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
+	if (xsk->xsks_map_fd == -1)
 		err = -ENOENT;
-		xsk_delete_bpf_maps(xsk);
-	}
 
 out_map_ids:
 	free(map_ids);
 	return err;
 }
 
-static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
-{
-	int qid = false;
-
-	bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
-	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
-}
-
 static int xsk_set_bpf_maps(struct xsk_socket *xsk)
 {
-	int qid = true, fd = xsk->fd, err;
-
-	err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
-	if (err)
-		goto out;
-
-	err = bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, &fd, 0);
-	if (err)
-		goto out;
-
-	return 0;
-out:
-	xsk_clear_bpf_maps(xsk);
-	return err;
+	return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+				   &xsk->fd, 0);
 }
 
 static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
@@ -497,26 +449,27 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
 			return err;
 
 		err = xsk_load_xdp_prog(xsk);
-		if (err)
-			goto out_maps;
+		if (err) {
+			xsk_delete_bpf_maps(xsk);
+			return err;
+		}
 	} else {
 		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
 		err = xsk_lookup_bpf_maps(xsk);
-		if (err)
-			goto out_load;
+		if (err) {
+			close(xsk->prog_fd);
+			return err;
+		}
 	}
 
 	err = xsk_set_bpf_maps(xsk);
-	if (err)
-		goto out_load;
+	if (err) {
+		xsk_delete_bpf_maps(xsk);
+		close(xsk->prog_fd);
+		return err;
+	}
 
 	return 0;
-
-out_load:
-	close(xsk->prog_fd);
-out_maps:
-	xsk_delete_bpf_maps(xsk);
-	return err;
 }
 
 int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
@@ -643,9 +596,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 		goto out_mmap_tx;
 	}
 
-	xsk->qidconf_map_fd = -1;
-	xsk->xsks_map_fd = -1;
-
+	xsk->prog_fd = -1;
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 		err = xsk_setup_xdp_prog(xsk);
 		if (err)
@@ -708,8 +659,10 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	if (!xsk)
 		return;
 
-	xsk_clear_bpf_maps(xsk);
-	xsk_delete_bpf_maps(xsk);
+	if (xsk->prog_fd != -1) {
+		xsk_delete_bpf_maps(xsk);
+		close(xsk->prog_fd);
+	}
 
 	optlen = sizeof(off);
 	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
-- 
cgit v1.2.3-59-g8ed1b


From c7cebffea2c7086850e4affeb9f232d34ae30f11 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 10 Jun 2019 10:46:55 -0700
Subject: selftests/bpf: fix constness of source arg for bpf helpers

Fix signature of bpf_probe_read and bpf_probe_write_user to mark source
pointer as const. This causes warnings during compilation for
applications relying on those helpers.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/bpf_helpers.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index e6d243b7cd74..1a5b1accf091 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -31,7 +31,7 @@ static int (*bpf_map_pop_elem)(void *map, void *value) =
 	(void *) BPF_FUNC_map_pop_elem;
 static int (*bpf_map_peek_elem)(void *map, void *value) =
 	(void *) BPF_FUNC_map_peek_elem;
-static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) =
 	(void *) BPF_FUNC_probe_read;
 static unsigned long long (*bpf_ktime_get_ns)(void) =
 	(void *) BPF_FUNC_ktime_get_ns;
@@ -62,7 +62,7 @@ static int (*bpf_perf_event_output)(void *ctx, void *map,
 	(void *) BPF_FUNC_perf_event_output;
 static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
 	(void *) BPF_FUNC_get_stackid;
-static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+static int (*bpf_probe_write_user)(void *dst, const void *src, int size) =
 	(void *) BPF_FUNC_probe_write_user;
 static int (*bpf_current_task_under_cgroup)(void *map, int index) =
 	(void *) BPF_FUNC_current_task_under_cgroup;
-- 
cgit v1.2.3-59-g8ed1b


From 89cceaa939171fafa153d4bf637b39e396bbd785 Mon Sep 17 00:00:00 2001
From: Hechao Li <hechaol@fb.com>
Date: Mon, 10 Jun 2019 17:43:07 -0700
Subject: selftests/bpf : clean up feature/ when make clean

An error "implicit declaration of function 'reallocarray'" can be thrown
with the following steps:

$ cd tools/testing/selftests/bpf
$ make clean && make CC=<Path to GCC 4.8.5>
$ make clean && make CC=<Path to GCC 7.x>

The cause is that the feature folder generated by GCC 4.8.5 is not
removed, leaving feature-reallocarray being 1, which causes reallocarray
not defined when re-compliing with GCC 7.x. This diff adds feature
folder to EXTRA_CLEAN to avoid this problem.

v2: Rephrase the commit message.

Signed-off-by: Hechao Li <hechaol@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 2b426ae1cdc9..44fb61f4d502 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -279,4 +279,5 @@ $(OUTPUT)/verifier/tests.h: $(VERIFIER_TESTS_DIR) $(VERIFIER_TEST_FILES)
 		 ) > $(VERIFIER_TESTS_H))
 
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \
-	$(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H)
+	$(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \
+	feature
-- 
cgit v1.2.3-59-g8ed1b


From 6446b3155521aab37f5a2b99ac264803ea7dced8 Mon Sep 17 00:00:00 2001
From: Hechao Li <hechaol@fb.com>
Date: Mon, 10 Jun 2019 17:56:50 -0700
Subject: bpf: add a new API libbpf_num_possible_cpus()

Adding a new API libbpf_num_possible_cpus() that helps user with
per-CPU map operations.

Signed-off-by: Hechao Li <hechaol@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c   | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   | 16 ++++++++++++++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 74 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ba89d9727137..dd8b2cd5d3a7 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3827,3 +3827,60 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
 					     desc->array_offset, addr);
 	}
 }
+
+int libbpf_num_possible_cpus(void)
+{
+	static const char *fcpu = "/sys/devices/system/cpu/possible";
+	int len = 0, n = 0, il = 0, ir = 0;
+	unsigned int start = 0, end = 0;
+	static int cpus;
+	char buf[128];
+	int error = 0;
+	int fd = -1;
+
+	if (cpus > 0)
+		return cpus;
+
+	fd = open(fcpu, O_RDONLY);
+	if (fd < 0) {
+		error = errno;
+		pr_warning("Failed to open file %s: %s\n",
+			   fcpu, strerror(error));
+		return -error;
+	}
+	len = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (len <= 0) {
+		error = len ? errno : EINVAL;
+		pr_warning("Failed to read # of possible cpus from %s: %s\n",
+			   fcpu, strerror(error));
+		return -error;
+	}
+	if (len == sizeof(buf)) {
+		pr_warning("File %s size overflow\n", fcpu);
+		return -EOVERFLOW;
+	}
+	buf[len] = '\0';
+
+	for (ir = 0, cpus = 0; ir <= len; ir++) {
+		/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
+		if (buf[ir] == ',' || buf[ir] == '\0') {
+			buf[ir] = '\0';
+			n = sscanf(&buf[il], "%u-%u", &start, &end);
+			if (n <= 0) {
+				pr_warning("Failed to get # CPUs from %s\n",
+					   &buf[il]);
+				return -EINVAL;
+			} else if (n == 1) {
+				end = start;
+			}
+			cpus += end - start + 1;
+			il = ir + 1;
+		}
+	}
+	if (cpus <= 0) {
+		pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu);
+		return -EINVAL;
+	}
+	return cpus;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1af0d48178c8..2e594a0fa961 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -454,6 +454,22 @@ bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
 LIBBPF_API void
 bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
 
+/*
+ * A helper function to get the number of possible CPUs before looking up
+ * per-CPU maps. Negative errno is returned on failure.
+ *
+ * Example usage:
+ *
+ *     int ncpus = libbpf_num_possible_cpus();
+ *     if (ncpus < 0) {
+ *          // error handling
+ *     }
+ *     long values[ncpus];
+ *     bpf_map_lookup_elem(per_cpu_map_fd, key, values);
+ *
+ */
+LIBBPF_API int libbpf_num_possible_cpus(void);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 46dcda89df21..2c6d835620d2 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -172,4 +172,5 @@ LIBBPF_0.0.4 {
 		btf_dump__new;
 		btf__parse_elf;
 		bpf_object__load_xattr;
+		libbpf_num_possible_cpus;
 } LIBBPF_0.0.3;
-- 
cgit v1.2.3-59-g8ed1b


From ebb886078bae29bd60eba3157dc6edae4bbed3a1 Mon Sep 17 00:00:00 2001
From: Hechao Li <hechaol@fb.com>
Date: Mon, 10 Jun 2019 17:56:51 -0700
Subject: selftests/bpf: remove bpf_util.h from BPF C progs

Though currently there is no problem including bpf_util.h in kernel
space BPF C programs, in next patch in this stack, I will reuse
libbpf_num_possible_cpus() in bpf_util.h thus include libbpf.h in it,
which will cause BPF C programs compile error. Therefore I will first
remove bpf_util.h from all test BPF programs.

This can also make it clear that bpf_util.h is a user-space utility
while bpf_helpers.h is a kernel space utility.

Signed-off-by: Hechao Li <hechaol@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/bpf_endian.h                 | 1 +
 tools/testing/selftests/bpf/progs/sockmap_parse_prog.c   | 1 -
 tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c | 2 +-
 tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c | 1 -
 tools/testing/selftests/bpf/progs/test_sysctl_prog.c     | 5 ++++-
 5 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
index b25595ea4a78..05f036df8a4c 100644
--- a/tools/testing/selftests/bpf/bpf_endian.h
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -2,6 +2,7 @@
 #ifndef __BPF_ENDIAN__
 #define __BPF_ENDIAN__
 
+#include <linux/stddef.h>
 #include <linux/swab.h>
 
 /* LLVM's BPF target selects the endianness of the CPU
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index ed3e4a551c57..9390e0244259 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -1,6 +1,5 @@
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 #include "bpf_endian.h"
 
 int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index 65fbfdb6cd3a..e80484d98a1a 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -1,6 +1,6 @@
 #include <linux/bpf.h>
+
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 #include "bpf_endian.h"
 
 int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index bdc22be46f2e..d85c874ef25e 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -1,6 +1,5 @@
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 #include "bpf_endian.h"
 
 int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index a295cad805d7..5cbbff416998 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -8,7 +8,6 @@
 #include <linux/bpf.h>
 
 #include "bpf_helpers.h"
-#include "bpf_util.h"
 
 /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
 #define MAX_ULONG_STR_LEN 0xF
@@ -16,6 +15,10 @@
 /* Max supported length of sysctl value string (pow2). */
 #define MAX_VALUE_STR_LEN 0x40
 
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
 static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
 {
 	char tcp_mem_name[] = "net/ipv4/tcp_mem";
-- 
cgit v1.2.3-59-g8ed1b


From 4c587c196d8237169405cbb2195669dbc00fd72b Mon Sep 17 00:00:00 2001
From: Hechao Li <hechaol@fb.com>
Date: Mon, 10 Jun 2019 17:56:52 -0700
Subject: bpf: use libbpf_num_possible_cpus internally

Use the newly added bpf_num_possible_cpus() in bpftool and selftests
and remove duplicate implementations.

Signed-off-by: Hechao Li <hechaol@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/common.c             | 53 ++++------------------------------
 tools/testing/selftests/bpf/bpf_util.h | 37 ++++--------------------
 2 files changed, 10 insertions(+), 80 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index f7261fad45c1..5215e0870bcb 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -21,6 +21,7 @@
 #include <sys/vfs.h>
 
 #include <bpf.h>
+#include <libbpf.h> /* libbpf_num_possible_cpus */
 
 #include "main.h"
 
@@ -439,57 +440,13 @@ unsigned int get_page_size(void)
 
 unsigned int get_possible_cpus(void)
 {
-	static unsigned int result;
-	char buf[128];
-	long int n;
-	char *ptr;
-	int fd;
-
-	if (result)
-		return result;
-
-	fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
-	if (fd < 0) {
-		p_err("can't open sysfs possible cpus");
-		exit(-1);
-	}
-
-	n = read(fd, buf, sizeof(buf));
-	if (n < 2) {
-		p_err("can't read sysfs possible cpus");
-		exit(-1);
-	}
-	close(fd);
+	int cpus = libbpf_num_possible_cpus();
 
-	if (n == sizeof(buf)) {
-		p_err("read sysfs possible cpus overflow");
+	if (cpus < 0) {
+		p_err("Can't get # of possible cpus: %s", strerror(-cpus));
 		exit(-1);
 	}
-
-	ptr = buf;
-	n = 0;
-	while (*ptr && *ptr != '\n') {
-		unsigned int a, b;
-
-		if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
-			n += b - a + 1;
-
-			ptr = strchr(ptr, '-') + 1;
-		} else if (sscanf(ptr, "%u", &a) == 1) {
-			n++;
-		} else {
-			assert(0);
-		}
-
-		while (isdigit(*ptr))
-			ptr++;
-		if (*ptr == ',')
-			ptr++;
-	}
-
-	result = n;
-
-	return result;
+	return cpus;
 }
 
 static char *
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index a29206ebbd13..ec219f84e041 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -6,44 +6,17 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <libbpf.h> /* libbpf_num_possible_cpus */
 
 static inline unsigned int bpf_num_possible_cpus(void)
 {
-	static const char *fcpu = "/sys/devices/system/cpu/possible";
-	unsigned int start, end, possible_cpus = 0;
-	char buff[128];
-	FILE *fp;
-	int len, n, i, j = 0;
+	int possible_cpus = libbpf_num_possible_cpus();
 
-	fp = fopen(fcpu, "r");
-	if (!fp) {
-		printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
+	if (possible_cpus < 0) {
+		printf("Failed to get # of possible cpus: '%s'!\n",
+		       strerror(-possible_cpus));
 		exit(1);
 	}
-
-	if (!fgets(buff, sizeof(buff), fp)) {
-		printf("Failed to read %s!\n", fcpu);
-		exit(1);
-	}
-
-	len = strlen(buff);
-	for (i = 0; i <= len; i++) {
-		if (buff[i] == ',' || buff[i] == '\0') {
-			buff[i] = '\0';
-			n = sscanf(&buff[j], "%u-%u", &start, &end);
-			if (n <= 0) {
-				printf("Failed to retrieve # possible CPUs!\n");
-				exit(1);
-			} else if (n == 1) {
-				end = start;
-			}
-			possible_cpus += end - start + 1;
-			j = i + 1;
-		}
-	}
-
-	fclose(fp);
-
 	return possible_cpus;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From e28799e52a0a7188374679184a32ca24921ff40e Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 6 Jun 2019 22:15:09 +0200
Subject: selftests: pmtu: Introduce list_flush_ipv6_exception test case

This test checks that route exceptions can be successfully listed and
flushed using ip -6 route {list,flush} cache.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 51 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 9e6d8b704186..269e839b747e 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -111,6 +111,10 @@
 #
 # - cleanup_ipv6_exception
 #	Same as above, but use IPv6 transport from A to B
+#
+# - list_flush_ipv6_exception
+#	Using the same topology as in pmtu_ipv6, create exceptions, and check
+#	they are shown when listing exception caches, gone after flushing them
 
 
 # Kselftest framework requirement - SKIP code is 4.
@@ -151,7 +155,8 @@ tests="
 	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation	0
 	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes	0
 	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions	1
-	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1"
+	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1
+	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1"
 
 NS_A="ns-A"
 NS_B="ns-B"
@@ -1202,6 +1207,50 @@ run_test_nh() {
 	USE_NH=no
 }
 
+test_list_flush_ipv6_exception() {
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	dst1="${prefix6}:${b_r1}::1"
+	dst2="${prefix6}:${b_r2}::1"
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1500
+	mtu "${ns_b}"  veth_B-R1 1500
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	fail=0
+
+	# Create route exceptions
+	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
+	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
+
+	if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 2 ]; then
+		err "  can't list cached exceptions"
+		fail=1
+	fi
+
+	run_cmd ${ns_a} ip -6 route flush cache
+	sleep 1
+	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ]; then
+		err "  can't flush cached exceptions"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
 usage() {
 	echo
 	echo "$0 [OPTIONS] [TEST]..."
-- 
cgit v1.2.3-59-g8ed1b


From 514fcaac371e31fe5bc6280d15f2903c37b015b8 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Tue, 11 Jun 2019 10:02:22 -0400
Subject: tc-tests: updated fw with bind actions by reference use cases

Extended fw TDC tests with use cases where actions are pre-created and
attached to a filter by reference, i.e. by action index.

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/filters/fw.json  | 144 +++++++++++++++++++++
 1 file changed, 144 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
index 3b97cfd7e0f8..6944b90d4897 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -56,6 +56,30 @@
             "$TC qdisc del dev $DEV1 ingress"
         ]
     },
+    {
+        "id": "c591",
+        "name": "Add fw filter with action ok by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet. Remove this when the behaviour is fixed.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact ok index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pass.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
     {
         "id": "affe",
         "name": "Add fw filter with action continue",
@@ -75,6 +99,30 @@
             "$TC qdisc del dev $DEV1 ingress"
         ]
     },
+    {
+        "id": "38b3",
+        "name": "Add fw filter with action continue by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet. Remove this when the behaviour is fixed.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact continue index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action continue.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
     {
         "id": "28bc",
         "name": "Add fw filter with action pipe",
@@ -94,6 +142,30 @@
             "$TC qdisc del dev $DEV1 ingress"
         ]
     },
+    {
+        "id": "6753",
+        "name": "Add fw filter with action pipe by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact pipe index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pipe.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
     {
         "id": "8da2",
         "name": "Add fw filter with action drop",
@@ -113,6 +185,30 @@
             "$TC qdisc del dev $DEV1 ingress"
         ]
     },
+    {
+        "id": "6dc6",
+        "name": "Add fw filter with action drop by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact drop index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action drop.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
     {
         "id": "9436",
         "name": "Add fw filter with action reclassify",
@@ -132,6 +228,30 @@
             "$TC qdisc del dev $DEV1 ingress"
         ]
     },
+    {
+        "id": "3bc2",
+        "name": "Add fw filter with action reclassify by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact reclassify index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action reclassify.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
     {
         "id": "95bb",
         "name": "Add fw filter with action jump 10",
@@ -151,6 +271,30 @@
             "$TC qdisc del dev $DEV1 ingress"
         ]
     },
+    {
+        "id": "36f7",
+        "name": "Add fw filter with action jump 10 by reference",
+        "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions add action gact jump 10 index 1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action jump 10.*index 1 ref 2 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "/bin/sleep 1",
+            "$TC actions del action gact index 1"
+        ]
+    },
     {
         "id": "3d74",
         "name": "Add fw filter with action goto chain 5",
-- 
cgit v1.2.3-59-g8ed1b


From 9366211f32076a6f3265f65a4851af9af22e4403 Mon Sep 17 00:00:00 2001
From: Shalom Toledo <shalomt@mellanox.com>
Date: Tue, 11 Jun 2019 18:45:12 +0300
Subject: selftests: ptp: Add Physical Hardware Clock test

Test the PTP Physical Hardware Clock functionality using the "phc_ctl" (a
part of "linuxptp").

The test contains three sub-tests:
  * "settime" test
  * "adjtime" test
  * "adjfreq" test

"settime" test:
  * set the PHC time to 0 seconds.
  * wait for 120.5 seconds.
  * check if PHC time equal to 120.XX seconds.

"adjtime" test:
  * set the PHC time to 0 seconds.
  * adjust the time by 10 seconds.
  * check if PHC time equal to 10.XX seconds.

"adjfreq" test:
  * adjust the PHC frequency to be 1% faster.
  * set the PHC time to 0 seconds.
  * wait for 100.5 seconds.
  * check if PHC time equal to 101.XX seconds.

Usage:
  $ ./phc.sh /dev/ptp<X>

  It is possible to run a subset of the tests, for example:
    * To run only the "settime" test:
      $ TESTS="settime" ./phc.sh /dev/ptp<X>

Signed-off-by: Shalom Toledo <shalomt@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Tested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/ptp/phc.sh | 166 +++++++++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100755 tools/testing/selftests/ptp/phc.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/ptp/phc.sh b/tools/testing/selftests/ptp/phc.sh
new file mode 100755
index 000000000000..ac6e5a6e1d3a
--- /dev/null
+++ b/tools/testing/selftests/ptp/phc.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	settime
+	adjtime
+	adjfreq
+"
+DEV=$1
+
+##############################################################################
+# Sanity checks
+
+if [[ "$(id -u)" -ne 0 ]]; then
+	echo "SKIP: need root privileges"
+	exit 0
+fi
+
+if [[ "$DEV" == "" ]]; then
+	echo "SKIP: PTP device not provided"
+	exit 0
+fi
+
+require_command()
+{
+	local cmd=$1; shift
+
+	if [[ ! -x "$(command -v "$cmd")" ]]; then
+		echo "SKIP: $cmd not installed"
+		exit 1
+	fi
+}
+
+phc_sanity()
+{
+	phc_ctl $DEV get &> /dev/null
+
+	if [ $? != 0 ]; then
+		echo "SKIP: unknown clock $DEV: No such device"
+		exit 1
+	fi
+}
+
+require_command phc_ctl
+phc_sanity
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+	local err=$1
+
+	if [[ $RET -eq 0 && $err -ne 0 ]]; then
+		RET=$err
+	fi
+}
+
+log_test()
+{
+	local test_name=$1
+
+	if [[ $RET -ne 0 ]]; then
+		EXIT_STATUS=1
+		printf "TEST: %-60s  [FAIL]\n" "$test_name"
+		return 1
+	fi
+
+	printf "TEST: %-60s  [ OK ]\n" "$test_name"
+	return 0
+}
+
+tests_run()
+{
+	local current_test
+
+	for current_test in ${TESTS:-$ALL_TESTS}; do
+		$current_test
+	done
+}
+
+##############################################################################
+# Tests
+
+settime_do()
+{
+	local res
+
+	res=$(phc_ctl $DEV set 0 wait 120.5 get 2> /dev/null \
+		| awk '/clock time is/{print $5}' \
+		| awk -F. '{print $1}')
+
+	(( res == 120 ))
+}
+
+adjtime_do()
+{
+	local res
+
+	res=$(phc_ctl $DEV set 0 adj 10 get 2> /dev/null \
+		| awk '/clock time is/{print $5}' \
+		| awk -F. '{print $1}')
+
+	(( res == 10 ))
+}
+
+adjfreq_do()
+{
+	local res
+
+	# Set the clock to be 1% faster
+	res=$(phc_ctl $DEV freq 10000000 set 0 wait 100.5 get 2> /dev/null \
+		| awk '/clock time is/{print $5}' \
+		| awk -F. '{print $1}')
+
+	(( res == 101 ))
+}
+
+##############################################################################
+
+cleanup()
+{
+	phc_ctl $DEV freq 0.0 &> /dev/null
+	phc_ctl $DEV set &> /dev/null
+}
+
+settime()
+{
+	RET=0
+
+	settime_do
+	check_err $?
+	log_test "settime"
+	cleanup
+}
+
+adjtime()
+{
+	RET=0
+
+	adjtime_do
+	check_err $?
+	log_test "adjtime"
+	cleanup
+}
+
+adjfreq()
+{
+	RET=0
+
+	adjfreq_do
+	check_err $?
+	log_test "adjfreq"
+	cleanup
+}
+
+trap cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-59-g8ed1b


From cabd3e889b5772a0dc448ddca9a4b23591640bca Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 13 Jun 2019 10:00:21 +0300
Subject: selftests/bpf: signedness bug in enable_all_controllers()

The "len" variable needs to be signed for the error handling to work
properly.

Fixes: 596092ef8bea ("selftests/bpf: enable all available cgroup v2 controllers")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 0d89f0396be4..e95c33e333a4 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -47,7 +47,7 @@ int enable_all_controllers(char *cgroup_path)
 	char buf[PATH_MAX];
 	char *c, *c2;
 	int fd, cfd;
-	size_t len;
+	ssize_t len;
 
 	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
 	fd = open(path, O_RDONLY);
-- 
cgit v1.2.3-59-g8ed1b


From e55d54f43d3f11718d9e2800225097fda498e141 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 12 Jun 2019 22:04:57 -0700
Subject: libbpf: fix check for presence of associated BTF for map creation

Kernel internally checks that either key or value type ID is specified,
before using btf_fd. Do the same in libbpf's map creation code for
determining when to retry map creation w/o BTF.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: fba01a0689a9 ("libbpf: use negative fd to specify missing BTF")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index dd8b2cd5d3a7..e725fa86b189 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1736,7 +1736,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 		create_attr.key_size = def->key_size;
 		create_attr.value_size = def->value_size;
 		create_attr.max_entries = def->max_entries;
-		create_attr.btf_fd = -1;
+		create_attr.btf_fd = 0;
 		create_attr.btf_key_type_id = 0;
 		create_attr.btf_value_type_id = 0;
 		if (bpf_map_type__is_map_in_map(def->type) &&
@@ -1750,11 +1750,12 @@ bpf_object__create_maps(struct bpf_object *obj)
 		}
 
 		*pfd = bpf_create_map_xattr(&create_attr);
-		if (*pfd < 0 && create_attr.btf_fd >= 0) {
+		if (*pfd < 0 && (create_attr.btf_key_type_id ||
+				 create_attr.btf_value_type_id)) {
 			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
 				   map->name, cp, errno);
-			create_attr.btf_fd = -1;
+			create_attr.btf_fd = 0;
 			create_attr.btf_key_type_id = 0;
 			create_attr.btf_value_type_id = 0;
 			map->btf_key_type_id = 0;
@@ -2045,7 +2046,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 	load_attr.license = license;
 	load_attr.kern_version = kern_version;
 	load_attr.prog_ifindex = prog->prog_ifindex;
-	load_attr.prog_btf_fd = prog->btf_fd;
+	load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
 	load_attr.func_info = prog->func_info;
 	load_attr.func_info_rec_size = prog->func_info_rec_size;
 	load_attr.func_info_cnt = prog->func_info_cnt;
-- 
cgit v1.2.3-59-g8ed1b


From 13a748ea6df1fee13fdcbd9d10a3d70f9e188b48 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 13 Jun 2019 15:00:03 -0700
Subject: bpf: Sync asm-generic/socket.h to tools/

SO_DETACH_REUSEPORT_BPF is needed for the test in the next patch.
It is defined in the socket.h.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/asm-generic/socket.h | 147 ++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 tools/include/uapi/asm-generic/socket.h

(limited to 'tools')

diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
new file mode 100644
index 000000000000..77f7c1638eb1
--- /dev/null
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_SOCKET_H
+#define __ASM_GENERIC_SOCKET_H
+
+#include <linux/posix_types.h>
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	1
+
+#define SO_DEBUG	1
+#define SO_REUSEADDR	2
+#define SO_TYPE		3
+#define SO_ERROR	4
+#define SO_DONTROUTE	5
+#define SO_BROADCAST	6
+#define SO_SNDBUF	7
+#define SO_RCVBUF	8
+#define SO_SNDBUFFORCE	32
+#define SO_RCVBUFFORCE	33
+#define SO_KEEPALIVE	9
+#define SO_OOBINLINE	10
+#define SO_NO_CHECK	11
+#define SO_PRIORITY	12
+#define SO_LINGER	13
+#define SO_BSDCOMPAT	14
+#define SO_REUSEPORT	15
+#ifndef SO_PASSCRED /* powerpc only differs in these */
+#define SO_PASSCRED	16
+#define SO_PEERCRED	17
+#define SO_RCVLOWAT	18
+#define SO_SNDLOWAT	19
+#define SO_RCVTIMEO_OLD	20
+#define SO_SNDTIMEO_OLD	21
+#endif
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	23
+#define SO_SECURITY_ENCRYPTION_NETWORK		24
+
+#define SO_BINDTODEVICE	25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER	26
+#define SO_DETACH_FILTER	27
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		28
+
+#define SO_ACCEPTCONN		30
+
+#define SO_PEERSEC		31
+#define SO_PASSSEC		34
+
+#define SO_MARK			36
+
+#define SO_PROTOCOL		38
+#define SO_DOMAIN		39
+
+#define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS	SO_WIFI_STATUS
+#define SO_PEEK_OFF		42
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		43
+
+#define SO_LOCK_FILTER		44
+
+#define SO_SELECT_ERR_QUEUE	45
+
+#define SO_BUSY_POLL		46
+
+#define SO_MAX_PACING_RATE	47
+
+#define SO_BPF_EXTENSIONS	48
+
+#define SO_INCOMING_CPU		49
+
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
+#define SO_ATTACH_REUSEPORT_CBPF	51
+#define SO_ATTACH_REUSEPORT_EBPF	52
+
+#define SO_CNX_ADVICE		53
+
+#define SCM_TIMESTAMPING_OPT_STATS	54
+
+#define SO_MEMINFO		55
+
+#define SO_INCOMING_NAPI_ID	56
+
+#define SO_COOKIE		57
+
+#define SCM_TIMESTAMPING_PKTINFO	58
+
+#define SO_PEERGROUPS		59
+
+#define SO_ZEROCOPY		60
+
+#define SO_TXTIME		61
+#define SCM_TXTIME		SO_TXTIME
+
+#define SO_BINDTOIFINDEX	62
+
+#define SO_TIMESTAMP_OLD        29
+#define SO_TIMESTAMPNS_OLD      35
+#define SO_TIMESTAMPING_OLD     37
+
+#define SO_TIMESTAMP_NEW        63
+#define SO_TIMESTAMPNS_NEW      64
+#define SO_TIMESTAMPING_NEW     65
+
+#define SO_RCVTIMEO_NEW         66
+#define SO_SNDTIMEO_NEW         67
+
+#define SO_DETACH_REUSEPORT_BPF 68
+
+#if !defined(__KERNEL__)
+
+#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+/* on 64-bit and x32, avoid the ?: operator */
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
+
+#define SO_RCVTIMEO		SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO		SO_SNDTIMEO_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
+#endif
+
+#define SCM_TIMESTAMP           SO_TIMESTAMP
+#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING        SO_TIMESTAMPING
+
+#endif
+
+#endif /* __ASM_GENERIC_SOCKET_H */
-- 
cgit v1.2.3-59-g8ed1b


From d30bd78ce8ba1f093aa4877ed06601d0debd872b Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Thu, 13 Jun 2019 15:00:05 -0700
Subject: bpf: Add test for SO_REUSEPORT_DETACH_BPF

This patch adds a test for the new sockopt SO_REUSEPORT_DETACH_BPF.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../testing/selftests/bpf/test_select_reuseport.c  | 54 ++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
index 75646d9b34aa..7566c13eb51a 100644
--- a/tools/testing/selftests/bpf/test_select_reuseport.c
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -523,6 +523,58 @@ static void test_pass_on_err(int type, sa_family_t family)
 	printf("OK\n");
 }
 
+static void test_detach_bpf(int type, sa_family_t family)
+{
+#ifdef SO_DETACH_REUSEPORT_BPF
+	__u32 nr_run_before = 0, nr_run_after = 0, tmp, i;
+	struct epoll_event ev;
+	int cli_fd, err, nev;
+	struct cmd cmd = {};
+	int optvalue = 0;
+
+	printf("%s: ", __func__);
+	err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
+			 &optvalue, sizeof(optvalue));
+	CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+	      "err:%d errno:%d\n", err, errno);
+
+	err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
+			 &optvalue, sizeof(optvalue));
+	CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+	      "err:%d errno:%d\n", err, errno);
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		err = bpf_map_lookup_elem(result_map, &i, &tmp);
+		CHECK(err == -1, "lookup_elem(result_map)",
+		      "i:%u err:%d errno:%d\n", i, err, errno);
+		nr_run_before += tmp;
+	}
+
+	cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
+	nev = epoll_wait(epfd, &ev, 1, 5);
+	CHECK(nev <= 0, "nev <= 0",
+	      "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
+	      nev,  type, family);
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		err = bpf_map_lookup_elem(result_map, &i, &tmp);
+		CHECK(err == -1, "lookup_elem(result_map)",
+		      "i:%u err:%d errno:%d\n", i, err, errno);
+		nr_run_after += tmp;
+	}
+
+	CHECK(nr_run_before != nr_run_after,
+	      "nr_run_before != nr_run_after",
+	      "nr_run_before:%u nr_run_after:%u\n",
+	      nr_run_before, nr_run_after);
+
+	printf("OK\n");
+	close(cli_fd);
+#else
+	printf("%s: SKIP\n", __func__);
+#endif
+}
+
 static void prepare_sk_fds(int type, sa_family_t family, bool inany)
 {
 	const int first = REUSEPORT_ARRAY_SIZE - 1;
@@ -664,6 +716,8 @@ static void test_all(void)
 			test_pass(type, family);
 			test_syncookie(type, family);
 			test_pass_on_err(type, family);
+			/* Must be the last test */
+			test_detach_bpf(type, family);
 
 			cleanup_per_test();
 			printf("\n");
-- 
cgit v1.2.3-59-g8ed1b


From cd17d77705780e2270937fb3cbd2b985adab3edc Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 12 Jun 2019 10:30:39 -0700
Subject: bpf/tools: sync bpf.h

Add sk to struct bpf_sock_addr and struct bpf_sock_ops.

Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ae0907d8c03a..d0a23476f887 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3247,6 +3247,7 @@ struct bpf_sock_addr {
 	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
 				 * Stored in network byte order.
 				 */
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
@@ -3298,6 +3299,7 @@ struct bpf_sock_ops {
 	__u32 sk_txhash;
 	__u64 bytes_received;
 	__u64 bytes_acked;
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 /* Definitions for bpf_sock_ops_cb_flags */
-- 
cgit v1.2.3-59-g8ed1b


From 69d96519dbf0bfa1868dc8597d4b9b2cdeb009d7 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 12 Jun 2019 10:30:40 -0700
Subject: selftests/bpf: convert socket_cookie test to sk storage

This lets us test that both BPF_PROG_TYPE_CGROUP_SOCK_ADDR and
BPF_PROG_TYPE_SOCK_OPS can access underlying bpf_sock.

Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/progs/socket_cookie_prog.c       | 46 ++++++++++++++--------
 tools/testing/selftests/bpf/test_socket_cookie.c   | 24 +++++------
 2 files changed, 38 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
index 9ff8ac4b0bf6..0db15c3210ad 100644
--- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
@@ -7,25 +7,35 @@
 #include "bpf_helpers.h"
 #include "bpf_endian.h"
 
+struct socket_cookie {
+	__u64 cookie_key;
+	__u32 cookie_value;
+};
+
 struct bpf_map_def SEC("maps") socket_cookies = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u64),
-	.value_size = sizeof(__u32),
-	.max_entries = 1 << 8,
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct socket_cookie),
+	.map_flags = BPF_F_NO_PREALLOC,
 };
 
+BPF_ANNOTATE_KV_PAIR(socket_cookies, int, struct socket_cookie);
+
 SEC("cgroup/connect6")
 int set_cookie(struct bpf_sock_addr *ctx)
 {
-	__u32 cookie_value = 0xFF;
-	__u64 cookie_key;
+	struct socket_cookie *p;
 
 	if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
 		return 1;
 
-	cookie_key = bpf_get_socket_cookie(ctx);
-	if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
-		return 0;
+	p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0,
+			       BPF_SK_STORAGE_GET_F_CREATE);
+	if (!p)
+		return 1;
+
+	p->cookie_value = 0xFF;
+	p->cookie_key = bpf_get_socket_cookie(ctx);
 
 	return 1;
 }
@@ -33,9 +43,8 @@ int set_cookie(struct bpf_sock_addr *ctx)
 SEC("sockops")
 int update_cookie(struct bpf_sock_ops *ctx)
 {
-	__u32 new_cookie_value;
-	__u32 *cookie_value;
-	__u64 cookie_key;
+	struct bpf_sock *sk;
+	struct socket_cookie *p;
 
 	if (ctx->family != AF_INET6)
 		return 1;
@@ -43,14 +52,17 @@ int update_cookie(struct bpf_sock_ops *ctx)
 	if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
 		return 1;
 
-	cookie_key = bpf_get_socket_cookie(ctx);
+	if (!ctx->sk)
+		return 1;
+
+	p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, 0);
+	if (!p)
+		return 1;
 
-	cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
-	if (!cookie_value)
+	if (p->cookie_key != bpf_get_socket_cookie(ctx))
 		return 1;
 
-	new_cookie_value = (ctx->local_port << 8) | *cookie_value;
-	bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+	p->cookie_value = (ctx->local_port << 8) | p->cookie_value;
 
 	return 1;
 }
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index cac8ee57a013..15653b0e26eb 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -18,6 +18,11 @@
 #define CG_PATH			"/foo"
 #define SOCKET_COOKIE_PROG	"./socket_cookie_prog.o"
 
+struct socket_cookie {
+	__u64 cookie_key;
+	__u32 cookie_value;
+};
+
 static int start_server(void)
 {
 	struct sockaddr_in6 addr;
@@ -89,8 +94,7 @@ static int validate_map(struct bpf_map *map, int client_fd)
 	__u32 cookie_expected_value;
 	struct sockaddr_in6 addr;
 	socklen_t len = sizeof(addr);
-	__u32 cookie_value;
-	__u64 cookie_key;
+	struct socket_cookie val;
 	int err = 0;
 	int map_fd;
 
@@ -101,17 +105,7 @@ static int validate_map(struct bpf_map *map, int client_fd)
 
 	map_fd = bpf_map__fd(map);
 
-	err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
-	if (err) {
-		log_err("Can't get cookie key from map");
-		goto out;
-	}
-
-	err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
-	if (err) {
-		log_err("Can't get cookie value from map");
-		goto out;
-	}
+	err = bpf_map_lookup_elem(map_fd, &client_fd, &val);
 
 	err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
 	if (err) {
@@ -120,8 +114,8 @@ static int validate_map(struct bpf_map *map, int client_fd)
 	}
 
 	cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
-	if (cookie_value != cookie_expected_value) {
-		log_err("Unexpected value in map: %x != %x", cookie_value,
+	if (val.cookie_value != cookie_expected_value) {
+		log_err("Unexpected value in map: %x != %x", val.cookie_value,
 			cookie_expected_value);
 		goto err;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From a51486266c3ba8e035a47fa96df67f274fe0c7d0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 15 Jun 2019 11:03:49 +0200
Subject: net: sched: remove NET_CLS_IND config option

This config option makes only couple of lines optional.
Two small helpers and an int in couple of cls structs.

Remove the config option and always compile this in.
This saves the user from unexpected surprises when he adds
a filter with ingress device match which is silently ignored
in case the config option is not set.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/configs/malta_defconfig           |  1 -
 arch/mips/configs/malta_kvm_defconfig       |  1 -
 arch/mips/configs/malta_kvm_guest_defconfig |  1 -
 arch/mips/configs/malta_qemu_32r6_defconfig |  1 -
 arch/mips/configs/maltaaprp_defconfig       |  1 -
 arch/mips/configs/maltasmvp_defconfig       |  1 -
 arch/mips/configs/maltasmvp_eva_defconfig   |  1 -
 arch/mips/configs/maltaup_defconfig         |  1 -
 arch/mips/configs/maltaup_xpa_defconfig     |  1 -
 arch/mips/configs/rb532_defconfig           |  1 -
 arch/powerpc/configs/ppc6xx_defconfig       |  1 -
 arch/sh/configs/se7712_defconfig            |  1 -
 arch/sh/configs/se7721_defconfig            |  1 -
 arch/sh/configs/titan_defconfig             |  1 -
 include/net/pkt_cls.h                       |  5 +----
 include/uapi/linux/pkt_cls.h                |  2 +-
 net/sched/Kconfig                           |  8 --------
 net/sched/cls_flower.c                      |  3 +--
 net/sched/cls_fw.c                          | 13 -------------
 net/sched/cls_u32.c                         | 15 ---------------
 tools/include/uapi/linux/pkt_cls.h          |  2 +-
 tools/testing/selftests/tc-testing/config   |  1 -
 22 files changed, 4 insertions(+), 59 deletions(-)

(limited to 'tools')

diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig
index 0ee5e677662e..0de92ac1ca64 100644
--- a/arch/mips/configs/malta_defconfig
+++ b/arch/mips/configs/malta_defconfig
@@ -210,7 +210,6 @@ CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
 CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig
index 041bffac043b..efc3abace048 100644
--- a/arch/mips/configs/malta_kvm_defconfig
+++ b/arch/mips/configs/malta_kvm_defconfig
@@ -215,7 +215,6 @@ CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
 CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig
index 511065e62182..c6ceeca4394d 100644
--- a/arch/mips/configs/malta_kvm_guest_defconfig
+++ b/arch/mips/configs/malta_kvm_guest_defconfig
@@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
 CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig
index 299088043164..e6c600dc1814 100644
--- a/arch/mips/configs/malta_qemu_32r6_defconfig
+++ b/arch/mips/configs/malta_qemu_32r6_defconfig
@@ -74,7 +74,6 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig
index 2b4b3a24f637..82b44b774553 100644
--- a/arch/mips/configs/maltaaprp_defconfig
+++ b/arch/mips/configs/maltaaprp_defconfig
@@ -76,7 +76,6 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index 425ddfd7cd78..4190fc6189a0 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig
@@ -77,7 +77,6 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig
index 8beaa7ba1e52..a13c10e910ec 100644
--- a/arch/mips/configs/maltasmvp_eva_defconfig
+++ b/arch/mips/configs/maltasmvp_eva_defconfig
@@ -78,7 +78,6 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig
index 6e8b95ceb54a..b35f1fc690fb 100644
--- a/arch/mips/configs/maltaup_defconfig
+++ b/arch/mips/configs/maltaup_defconfig
@@ -75,7 +75,6 @@ CONFIG_NET_CLS_RSVP=m
 CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_ACT_POLICE=y
-CONFIG_NET_CLS_IND=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig
index 6c026db96ff9..56861aef2756 100644
--- a/arch/mips/configs/maltaup_xpa_defconfig
+++ b/arch/mips/configs/maltaup_xpa_defconfig
@@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
 CONFIG_MAC80211_MESH=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index 50632a3103dd..864c70fbe668 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -103,7 +103,6 @@ CONFIG_GACT_PROB=y
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_IPT=m
 CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_CLS_IND=y
 CONFIG_HAMRADIO=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index 7c6baf6df139..aa51b9b66fa2 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -301,7 +301,6 @@ CONFIG_NET_ACT_NAT=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
 CONFIG_IRDA=m
 CONFIG_IRLAN=m
 CONFIG_IRNET=m
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index 5a1097641247..1e116529735f 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -63,7 +63,6 @@ CONFIG_NET_SCH_NETEM=y
 CONFIG_NET_CLS_TCINDEX=y
 CONFIG_NET_CLS_ROUTE4=y
 CONFIG_NET_CLS_FW=y
-CONFIG_NET_CLS_IND=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index 9c0ef13bee10..c66e512719ab 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -62,7 +62,6 @@ CONFIG_NET_SCH_NETEM=y
 CONFIG_NET_CLS_TCINDEX=y
 CONFIG_NET_CLS_ROUTE4=y
 CONFIG_NET_CLS_FW=y
-CONFIG_NET_CLS_IND=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_BLOCK=y
diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig
index 822fa9e96f74..171ab05ce4fc 100644
--- a/arch/sh/configs/titan_defconfig
+++ b/arch/sh/configs/titan_defconfig
@@ -142,7 +142,6 @@ CONFIG_GACT_PROB=y
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_IPT=m
 CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_CLS_IND=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_FW_LOADER=m
 CONFIG_CONNECTOR=m
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 514e3c80ecc1..720f2b32fc2f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -7,6 +7,7 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 #include <net/flow_offload.h>
+#include <net/net_namespace.h>
 
 /* TC action not accessible from user space */
 #define TC_ACT_REINSERT		(TC_ACT_VALUE_MAX + 1)
@@ -576,9 +577,6 @@ static inline int tcf_valid_offset(const struct sk_buff *skb,
 		      (ptr <= (ptr + len)));
 }
 
-#ifdef CONFIG_NET_CLS_IND
-#include <net/net_namespace.h>
-
 static inline int
 tcf_change_indev(struct net *net, struct nlattr *indev_tlv,
 		 struct netlink_ext_ack *extack)
@@ -605,7 +603,6 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 		return false;
 	return ifindex == skb->skb_iif;
 }
-#endif /* CONFIG_NET_CLS_IND */
 
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts);
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index a93680fc4bfa..8cc6b6777b3c 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -295,7 +295,7 @@ enum {
 	TCA_FW_UNSPEC,
 	TCA_FW_CLASSID,
 	TCA_FW_POLICE,
-	TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
+	TCA_FW_INDEV,
 	TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
 	TCA_FW_MASK,
 	__TCA_FW_MAX
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index d104f7ee26c7..360fdd3eaa77 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -941,14 +941,6 @@ config NET_IFE_SKBTCINDEX
         tristate "Support to encoding decoding skb tcindex on IFE action"
         depends on NET_ACT_IFE
 
-config NET_CLS_IND
-	bool "Incoming device classification"
-	depends on NET_CLS_U32 || NET_CLS_FW
-	---help---
-	  Say Y here to extend the u32 and fw classifier to support
-	  classification based on the incoming device. This option is
-	  likely to disappear in favour of the metadata ematch.
-
 endif # NET_SCHED
 
 config NET_SCH_FIFO
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c388372df0e2..84c7f279855b 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1010,7 +1010,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 {
 	__be16 ethertype;
 	int ret = 0;
-#ifdef CONFIG_NET_CLS_IND
+
 	if (tb[TCA_FLOWER_INDEV]) {
 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
 		if (err < 0)
@@ -1018,7 +1018,6 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		key->indev_ifindex = err;
 		mask->indev_ifindex = 0xffffffff;
 	}
-#endif
 
 	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
 		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 4dab833f66cb..c9496c920d6f 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -8,9 +8,6 @@
  * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
  * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
  * Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
- *
- * JHS: We should remove the CONFIG_NET_CLS_IND from here
- * eventually when the meta match extension is made available
  */
 
 #include <linux/module.h>
@@ -37,9 +34,7 @@ struct fw_filter {
 	struct fw_filter __rcu	*next;
 	u32			id;
 	struct tcf_result	res;
-#ifdef CONFIG_NET_CLS_IND
 	int			ifindex;
-#endif /* CONFIG_NET_CLS_IND */
 	struct tcf_exts		exts;
 	struct tcf_proto	*tp;
 	struct rcu_work		rwork;
@@ -67,10 +62,8 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		     f = rcu_dereference_bh(f->next)) {
 			if (f->id == id) {
 				*res = f->res;
-#ifdef CONFIG_NET_CLS_IND
 				if (!tcf_match_indev(skb, f->ifindex))
 					continue;
-#endif /* CONFIG_NET_CLS_IND */
 				r = tcf_exts_exec(skb, &f->exts, res);
 				if (r < 0)
 					continue;
@@ -222,7 +215,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 		tcf_bind_filter(tp, &f->res, base);
 	}
 
-#ifdef CONFIG_NET_CLS_IND
 	if (tb[TCA_FW_INDEV]) {
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
@@ -230,7 +222,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 			return ret;
 		f->ifindex = ret;
 	}
-#endif /* CONFIG_NET_CLS_IND */
 
 	err = -EINVAL;
 	if (tb[TCA_FW_MASK]) {
@@ -276,9 +267,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 
 		fnew->id = f->id;
 		fnew->res = f->res;
-#ifdef CONFIG_NET_CLS_IND
 		fnew->ifindex = f->ifindex;
-#endif /* CONFIG_NET_CLS_IND */
 		fnew->tp = f->tp;
 
 		err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT,
@@ -405,14 +394,12 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (f->res.classid &&
 	    nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid))
 		goto nla_put_failure;
-#ifdef CONFIG_NET_CLS_IND
 	if (f->ifindex) {
 		struct net_device *dev;
 		dev = __dev_get_by_index(net, f->ifindex);
 		if (dev && nla_put_string(skb, TCA_FW_INDEV, dev->name))
 			goto nla_put_failure;
 	}
-#endif /* CONFIG_NET_CLS_IND */
 	if (head->mask != 0xFFFFFFFF &&
 	    nla_put_u32(skb, TCA_FW_MASK, head->mask))
 		goto nla_put_failure;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index c7727de5e073..be9e46c77e8b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -20,9 +20,6 @@
  *	pure RSVP doesn't need such a general approach and can use
  *	much simpler (and faster) schemes, sort of cls_rsvp.c.
  *
- *	JHS: We should remove the CONFIG_NET_CLS_IND from here
- *	eventually when the meta match extension is made available
- *
  *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
  */
 
@@ -48,9 +45,7 @@ struct tc_u_knode {
 	u32			handle;
 	struct tc_u_hnode __rcu	*ht_up;
 	struct tcf_exts		exts;
-#ifdef CONFIG_NET_CLS_IND
 	int			ifindex;
-#endif
 	u8			fshift;
 	struct tcf_result	res;
 	struct tc_u_hnode __rcu	*ht_down;
@@ -176,12 +171,10 @@ check_terminal:
 			if (n->sel.flags & TC_U32_TERMINAL) {
 
 				*res = n->res;
-#ifdef CONFIG_NET_CLS_IND
 				if (!tcf_match_indev(skb, n->ifindex)) {
 					n = rcu_dereference_bh(n->next);
 					goto next_knode;
 				}
-#endif
 #ifdef CONFIG_CLS_U32_PERF
 				__this_cpu_inc(n->pf->rhit);
 #endif
@@ -761,7 +754,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 		tcf_bind_filter(tp, &n->res, base);
 	}
 
-#ifdef CONFIG_NET_CLS_IND
 	if (tb[TCA_U32_INDEV]) {
 		int ret;
 		ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
@@ -769,7 +761,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 			return -EINVAL;
 		n->ifindex = ret;
 	}
-#endif
 	return 0;
 }
 
@@ -817,9 +808,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 	new->handle = n->handle;
 	RCU_INIT_POINTER(new->ht_up, n->ht_up);
 
-#ifdef CONFIG_NET_CLS_IND
 	new->ifindex = n->ifindex;
-#endif
 	new->fshift = n->fshift;
 	new->res = n->res;
 	new->flags = n->flags;
@@ -1351,14 +1340,12 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		if (tcf_exts_dump(skb, &n->exts) < 0)
 			goto nla_put_failure;
 
-#ifdef CONFIG_NET_CLS_IND
 		if (n->ifindex) {
 			struct net_device *dev;
 			dev = __dev_get_by_index(net, n->ifindex);
 			if (dev && nla_put_string(skb, TCA_U32_INDEV, dev->name))
 				goto nla_put_failure;
 		}
-#endif
 #ifdef CONFIG_CLS_U32_PERF
 		gpf = kzalloc(sizeof(struct tc_u32_pcnt) +
 			      n->sel.nkeys * sizeof(u64),
@@ -1422,9 +1409,7 @@ static int __init init_u32(void)
 #ifdef CONFIG_CLS_U32_PERF
 	pr_info("    Performance counters on\n");
 #endif
-#ifdef CONFIG_NET_CLS_IND
 	pr_info("    input device check on\n");
-#endif
 #ifdef CONFIG_NET_CLS_ACT
 	pr_info("    Actions configured\n");
 #endif
diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
index 401d0c1e612d..12153771396a 100644
--- a/tools/include/uapi/linux/pkt_cls.h
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -257,7 +257,7 @@ enum {
 	TCA_FW_UNSPEC,
 	TCA_FW_CLASSID,
 	TCA_FW_POLICE,
-	TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
+	TCA_FW_INDEV,
 	TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
 	TCA_FW_MASK,
 	__TCA_FW_MAX
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index b235efd55367..1adc4f9bb795 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -45,5 +45,4 @@ CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_IFE_SKBMARK=m
 CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
-CONFIG_NET_CLS_IND=y
 CONFIG_NET_SCH_FIFO=y
-- 
cgit v1.2.3-59-g8ed1b


From 92c1a19e2fb9dc3194de123009cd8d418f1e2a3b Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Sat, 15 Jun 2019 15:41:43 -0400
Subject: tc-tests: added path to ip command in tdc

This macro $IP will be used in upcoming tc tests, which require
to create interfaces etc.

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc_config.py | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index 942c70c041be..b771d4c89621 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -10,6 +10,8 @@ Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
 NAMES = {
           # Substitute your own tc path here
           'TC': '/sbin/tc',
+          # Substitute your own ip path here
+          'IP': '/sbin/ip',
           # Name of veth devices to be created for the namespace
           'DEV0': 'v0p0',
           'DEV1': 'v0p1',
-- 
cgit v1.2.3-59-g8ed1b


From 1718132587e8c73c9a6fd7cfbf0fa9f14559c134 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Sat, 15 Jun 2019 16:25:50 -0400
Subject: tc-tests: updated skbedit tests

- Added index upper bound test case
- Added mark upper bound test case
- Re-worded descriptions to few cases for clarity

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/skbedit.json       | 62 ++++++++++++++++++----
 1 file changed, 53 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
index ecd96eda7f6a..45e7e89928a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -23,9 +23,33 @@
             "$TC actions flush action skbedit"
         ]
     },
+    {
+        "id": "c8cf",
+        "name": "Add skbedit action with 32-bit maximum mark",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 4294967295 pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 1",
+        "matchPattern": "action order [0-9]*: skbedit  mark 4294967295.*pipe.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
     {
         "id": "407b",
-        "name": "Add skbedit action with invalid mark",
+        "name": "Add skbedit action with mark exceeding 32-bit maximum",
         "category": [
             "actions",
             "skbedit"
@@ -43,9 +67,7 @@
         "verifyCmd": "$TC actions list action skbedit",
         "matchPattern": "action order [0-9]*:  skbedit mark",
         "matchCount": "0",
-        "teardown": [
-            "$TC actions flush action skbedit"
-        ]
+        "teardown": []
     },
     {
         "id": "081d",
@@ -121,7 +143,7 @@
     },
     {
         "id": "985c",
-        "name": "Add skbedit action with invalid queue_mapping",
+        "name": "Add skbedit action with queue_mapping exceeding 16-bit maximum",
         "category": [
             "actions",
             "skbedit"
@@ -413,7 +435,7 @@
     },
     {
         "id": "a6d6",
-        "name": "Add skbedit action with index",
+        "name": "Add skbedit action with index at 32-bit maximum",
         "category": [
             "actions",
             "skbedit"
@@ -426,15 +448,37 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040",
+        "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4294967295",
         "expExitCode": "0",
-        "verifyCmd": "$TC actions list action skbedit",
-        "matchPattern": "index 4040404040",
+        "verifyCmd": "$TC actions get action skbedit index 4294967295",
+        "matchPattern": "action order [0-9]*: skbedit  mark 808.*index 4294967295",
         "matchCount": "1",
         "teardown": [
             "$TC actions flush action skbedit"
         ]
     },
+    {
+        "id": "f0f4",
+        "name": "Add skbedit action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 808 pass index 4294967297",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action skbedit index 4294967297",
+        "matchPattern": "action order [0-9]*:.*skbedit.*mark 808.*pass.*index 4294967297",
+        "matchCount": "0",
+        "teardown": []
+    },
     {
         "id": "38f3",
         "name": "Delete skbedit action",
-- 
cgit v1.2.3-59-g8ed1b


From f464100f576610aa4702dd9e168c80d6c661958a Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sun, 16 Jun 2019 13:15:01 -0400
Subject: selftests/net: fix warnings in TFO key rotation selftest

One warning each on signedness, unused variable and return type.

Fixes: 10fbcdd12aa2 ("selftests/net: add TFO key rotation selftest")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tcp_fastopen_backup_key.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index 58bb77d9e7e1..9c55ec44fc43 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -51,7 +51,7 @@ static const int PORT = 8891;
 static void get_keys(int fd, uint32_t *keys)
 {
 	char buf[128];
-	int len = KEY_LENGTH * 2;
+	socklen_t len = KEY_LENGTH * 2;
 
 	if (do_sockopt) {
 		if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len))
@@ -210,14 +210,13 @@ static bool is_listen_fd(int fd)
 	return false;
 }
 
-static int rotate_key(int fd)
+static void rotate_key(int fd)
 {
 	static int iter;
 	static uint32_t new_key[4];
 	uint32_t keys[8];
 	uint32_t tmp_key[4];
 	int i;
-	int len = KEY_LENGTH * 2;
 
 	if (iter < N_LISTEN) {
 		/* first set new key as backups */
-- 
cgit v1.2.3-59-g8ed1b


From d7fe74f9404a9736e9d4f754c30e43640a822c17 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:50 -0700
Subject: libbpf: add common min/max macro to libbpf_internal.h

Multiple files in libbpf redefine their own definitions for min/max.
Let's define them in libbpf_internal.h and use those everywhere.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/bpf.c             | 7 ++-----
 tools/lib/bpf/bpf_prog_linfo.c  | 5 +----
 tools/lib/bpf/btf.c             | 3 ---
 tools/lib/bpf/btf_dump.c        | 3 ---
 tools/lib/bpf/libbpf_internal.h | 7 +++++++
 5 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 0d4b4fe10a84..c7d7993c44bb 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -26,10 +26,11 @@
 #include <memory.h>
 #include <unistd.h>
 #include <asm/unistd.h>
+#include <errno.h>
 #include <linux/bpf.h>
 #include "bpf.h"
 #include "libbpf.h"
-#include <errno.h>
+#include "libbpf_internal.h"
 
 /*
  * When building perf, unistd.h is overridden. __NR_bpf is
@@ -53,10 +54,6 @@
 # endif
 #endif
 
-#ifndef min
-#define min(x, y) ((x) < (y) ? (x) : (y))
-#endif
-
 static inline __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c
index 6978314ea7f6..8c67561c93b0 100644
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -6,10 +6,7 @@
 #include <linux/err.h>
 #include <linux/bpf.h>
 #include "libbpf.h"
-
-#ifndef min
-#define min(x, y) ((x) < (y) ? (x) : (y))
-#endif
+#include "libbpf_internal.h"
 
 struct bpf_prog_linfo {
 	void *raw_linfo;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index b2478e98c367..467224feb43b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -16,9 +16,6 @@
 #include "libbpf_internal.h"
 #include "hashmap.h"
 
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 #define BTF_MAX_NR_TYPES 0x7fffffff
 #define BTF_MAX_STR_OFFSET 0x7fffffff
 
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 4b22db77e2cc..7065bb5b2752 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -18,9 +18,6 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
-#define min(x, y) ((x) < (y) ? (x) : (y))
-#define max(x, y) ((x) < (y) ? (y) : (x))
-
 static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
 static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
 
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 850f7bdec5cb..554a7856dc2d 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -23,6 +23,13 @@
 #define BTF_PARAM_ENC(name, type) (name), (type)
 #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
 
+#ifndef min
+# define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
+#ifndef max
+# define max(x, y) ((x) < (y) ? (y) : (x))
+#endif
+
 extern void libbpf_print(enum libbpf_print_level level,
 			 const char *format, ...)
 	__attribute__((format(printf, 2, 3)));
-- 
cgit v1.2.3-59-g8ed1b


From 9c6660d0406f8c32d21957ca9853108513bea8b1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:51 -0700
Subject: libbpf: extract BTF loading logic

As a preparation for adding BTF-based BPF map loading, extract .BTF and
.BTF.ext loading logic.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 93 +++++++++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 38 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e725fa86b189..49d3a808e754 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1078,6 +1078,58 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
 	}
 }
 
+static int bpf_object__load_btf(struct bpf_object *obj,
+				Elf_Data *btf_data,
+				Elf_Data *btf_ext_data)
+{
+	int err = 0;
+
+	if (btf_data) {
+		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+		if (IS_ERR(obj->btf)) {
+			pr_warning("Error loading ELF section %s: %d.\n",
+				   BTF_ELF_SEC, err);
+			goto out;
+		}
+		err = btf__finalize_data(obj, obj->btf);
+		if (err) {
+			pr_warning("Error finalizing %s: %d.\n",
+				   BTF_ELF_SEC, err);
+			goto out;
+		}
+		bpf_object__sanitize_btf(obj);
+		err = btf__load(obj->btf);
+		if (err) {
+			pr_warning("Error loading %s into kernel: %d.\n",
+				   BTF_ELF_SEC, err);
+			goto out;
+		}
+	}
+	if (btf_ext_data) {
+		if (!obj->btf) {
+			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
+				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
+			goto out;
+		}
+		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
+					    btf_ext_data->d_size);
+		if (IS_ERR(obj->btf_ext)) {
+			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+				   BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+			obj->btf_ext = NULL;
+			goto out;
+		}
+		bpf_object__sanitize_btf_ext(obj);
+	}
+out:
+	if (err || IS_ERR(obj->btf)) {
+		if (!IS_ERR_OR_NULL(obj->btf))
+			btf__free(obj->btf);
+		obj->btf = NULL;
+	}
+	return 0;
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 {
 	Elf *elf = obj->efile.elf;
@@ -1212,44 +1264,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 		pr_warning("Corrupted ELF file: index of strtab invalid\n");
 		return -LIBBPF_ERRNO__FORMAT;
 	}
-	if (btf_data) {
-		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
-		if (IS_ERR(obj->btf)) {
-			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
-				   BTF_ELF_SEC, PTR_ERR(obj->btf));
-			obj->btf = NULL;
-		} else {
-			err = btf__finalize_data(obj, obj->btf);
-			if (!err) {
-				bpf_object__sanitize_btf(obj);
-				err = btf__load(obj->btf);
-			}
-			if (err) {
-				pr_warning("Error finalizing and loading %s into kernel: %d. Ignored and continue.\n",
-					   BTF_ELF_SEC, err);
-				btf__free(obj->btf);
-				obj->btf = NULL;
-				err = 0;
-			}
-		}
-	}
-	if (btf_ext_data) {
-		if (!obj->btf) {
-			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
-				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
-		} else {
-			obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
-						    btf_ext_data->d_size);
-			if (IS_ERR(obj->btf_ext)) {
-				pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
-					   BTF_EXT_ELF_SEC,
-					   PTR_ERR(obj->btf_ext));
-				obj->btf_ext = NULL;
-			} else {
-				bpf_object__sanitize_btf_ext(obj);
-			}
-		}
-	}
+	err = bpf_object__load_btf(obj, btf_data, btf_ext_data);
+	if (err)
+		return err;
 	if (bpf_object__has_maps(obj)) {
 		err = bpf_object__init_maps(obj, flags);
 		if (err)
-- 
cgit v1.2.3-59-g8ed1b


From 01b29d1dc9a06e2cdc6a0172d35605eb0300ecb2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:52 -0700
Subject: libbpf: streamline ELF parsing error-handling

Simplify ELF parsing logic by exiting early, as there is no common clean
up path to execute. That makes it unnecessary to track when err was set
and when it was cleared. It also reduces nesting in some places.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 49d3a808e754..7ee44d8877c5 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1154,24 +1154,21 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 		if (gelf_getshdr(scn, &sh) != &sh) {
 			pr_warning("failed to get section(%d) header from %s\n",
 				   idx, obj->path);
-			err = -LIBBPF_ERRNO__FORMAT;
-			goto out;
+			return -LIBBPF_ERRNO__FORMAT;
 		}
 
 		name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
 		if (!name) {
 			pr_warning("failed to get section(%d) name from %s\n",
 				   idx, obj->path);
-			err = -LIBBPF_ERRNO__FORMAT;
-			goto out;
+			return -LIBBPF_ERRNO__FORMAT;
 		}
 
 		data = elf_getdata(scn, 0);
 		if (!data) {
 			pr_warning("failed to get section(%d) data from %s(%s)\n",
 				   idx, name, obj->path);
-			err = -LIBBPF_ERRNO__FORMAT;
-			goto out;
+			return -LIBBPF_ERRNO__FORMAT;
 		}
 		pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
 			 idx, name, (unsigned long)data->d_size,
@@ -1182,10 +1179,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			err = bpf_object__init_license(obj,
 						       data->d_buf,
 						       data->d_size);
+			if (err)
+				return err;
 		} else if (strcmp(name, "version") == 0) {
 			err = bpf_object__init_kversion(obj,
 							data->d_buf,
 							data->d_size);
+			if (err)
+				return err;
 		} else if (strcmp(name, "maps") == 0) {
 			obj->efile.maps_shndx = idx;
 		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
@@ -1196,11 +1197,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 			if (obj->efile.symbols) {
 				pr_warning("bpf: multiple SYMTAB in %s\n",
 					   obj->path);
-				err = -LIBBPF_ERRNO__FORMAT;
-			} else {
-				obj->efile.symbols = data;
-				obj->efile.strtabidx = sh.sh_link;
+				return -LIBBPF_ERRNO__FORMAT;
 			}
+			obj->efile.symbols = data;
+			obj->efile.strtabidx = sh.sh_link;
 		} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
 			if (sh.sh_flags & SHF_EXECINSTR) {
 				if (strcmp(name, ".text") == 0)
@@ -1214,6 +1214,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 
 					pr_warning("failed to alloc program %s (%s): %s",
 						   name, obj->path, cp);
+					return err;
 				}
 			} else if (strcmp(name, ".data") == 0) {
 				obj->efile.data = data;
@@ -1225,8 +1226,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 				pr_debug("skip section(%d) %s\n", idx, name);
 			}
 		} else if (sh.sh_type == SHT_REL) {
+			int nr_reloc = obj->efile.nr_reloc;
 			void *reloc = obj->efile.reloc;
-			int nr_reloc = obj->efile.nr_reloc + 1;
 			int sec = sh.sh_info; /* points to other section */
 
 			/* Only do relo for section with exec instructions */
@@ -1236,28 +1237,24 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 				continue;
 			}
 
-			reloc = reallocarray(reloc, nr_reloc,
+			reloc = reallocarray(reloc, nr_reloc + 1,
 					     sizeof(*obj->efile.reloc));
 			if (!reloc) {
 				pr_warning("realloc failed\n");
-				err = -ENOMEM;
-			} else {
-				int n = nr_reloc - 1;
+				return -ENOMEM;
+			}
 
-				obj->efile.reloc = reloc;
-				obj->efile.nr_reloc = nr_reloc;
+			obj->efile.reloc = reloc;
+			obj->efile.nr_reloc++;
 
-				obj->efile.reloc[n].shdr = sh;
-				obj->efile.reloc[n].data = data;
-			}
+			obj->efile.reloc[nr_reloc].shdr = sh;
+			obj->efile.reloc[nr_reloc].data = data;
 		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
 			obj->efile.bss = data;
 			obj->efile.bss_shndx = idx;
 		} else {
 			pr_debug("skip section(%d) %s\n", idx, name);
 		}
-		if (err)
-			goto out;
 	}
 
 	if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
@@ -1270,10 +1267,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 	if (bpf_object__has_maps(obj)) {
 		err = bpf_object__init_maps(obj, flags);
 		if (err)
-			goto out;
+			return err;
 	}
 	err = bpf_object__init_prog_names(obj);
-out:
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From bf82927125dd25003d76ed5541da704df21de57a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:53 -0700
Subject: libbpf: refactor map initialization

User and global data maps initialization has gotten pretty complicated
and unnecessarily convoluted. This patch splits out the logic for global
data map and user-defined map initialization. It also removes the
restriction of pre-calculating how many maps will be initialized,
instead allowing to keep adding new maps as they are discovered, which
will be used later for BTF-defined map definitions.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 247 ++++++++++++++++++++++++++-----------------------
 1 file changed, 133 insertions(+), 114 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7ee44d8877c5..88609dca4f7d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -234,6 +234,7 @@ struct bpf_object {
 	size_t nr_programs;
 	struct bpf_map *maps;
 	size_t nr_maps;
+	size_t maps_cap;
 	struct bpf_secdata sections;
 
 	bool loaded;
@@ -763,21 +764,51 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
 	return -ENOENT;
 }
 
-static bool bpf_object__has_maps(const struct bpf_object *obj)
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 {
-	return obj->efile.maps_shndx >= 0 ||
-	       obj->efile.data_shndx >= 0 ||
-	       obj->efile.rodata_shndx >= 0 ||
-	       obj->efile.bss_shndx >= 0;
+	struct bpf_map *new_maps;
+	size_t new_cap;
+	int i;
+
+	if (obj->nr_maps < obj->maps_cap)
+		return &obj->maps[obj->nr_maps++];
+
+	new_cap = max(4ul, obj->maps_cap * 3 / 2);
+	new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+	if (!new_maps) {
+		pr_warning("alloc maps for object failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	obj->maps_cap = new_cap;
+	obj->maps = new_maps;
+
+	/* zero out new maps */
+	memset(obj->maps + obj->nr_maps, 0,
+	       (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
+	/*
+	 * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
+	 * when failure (zclose won't close negative fd)).
+	 */
+	for (i = obj->nr_maps; i < obj->maps_cap; i++) {
+		obj->maps[i].fd = -1;
+		obj->maps[i].inner_map_fd = -1;
+	}
+
+	return &obj->maps[obj->nr_maps++];
 }
 
 static int
-bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
-			      enum libbpf_map_type type, Elf_Data *data,
-			      void **data_buff)
+bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
+			      Elf_Data *data, void **data_buff)
 {
-	struct bpf_map_def *def = &map->def;
 	char map_name[BPF_OBJ_NAME_LEN];
+	struct bpf_map_def *def;
+	struct bpf_map *map;
+
+	map = bpf_object__add_map(obj);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
 	map->libbpf_type = type;
 	map->offset = ~(typeof(map->offset))0;
@@ -789,6 +820,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 		return -ENOMEM;
 	}
 
+	def = &map->def;
 	def->type = BPF_MAP_TYPE_ARRAY;
 	def->key_size = sizeof(int);
 	def->value_size = data->d_size;
@@ -808,29 +840,58 @@ bpf_object__init_internal_map(struct bpf_object *obj, struct bpf_map *map,
 	return 0;
 }
 
-static int bpf_object__init_maps(struct bpf_object *obj, int flags)
+static int bpf_object__init_global_data_maps(struct bpf_object *obj)
+{
+	int err;
+
+	if (!obj->caps.global_data)
+		return 0;
+	/*
+	 * Populate obj->maps with libbpf internal maps.
+	 */
+	if (obj->efile.data_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
+						    obj->efile.data,
+						    &obj->sections.data);
+		if (err)
+			return err;
+	}
+	if (obj->efile.rodata_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
+						    obj->efile.rodata,
+						    &obj->sections.rodata);
+		if (err)
+			return err;
+	}
+	if (obj->efile.bss_shndx >= 0) {
+		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
+						    obj->efile.bss, NULL);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 {
-	int i, map_idx, map_def_sz = 0, nr_syms, nr_maps = 0, nr_maps_glob = 0;
-	bool strict = !(flags & MAPS_RELAX_COMPAT);
 	Elf_Data *symbols = obj->efile.symbols;
+	int i, map_def_sz = 0, nr_maps = 0, nr_syms;
 	Elf_Data *data = NULL;
-	int ret = 0;
+	Elf_Scn *scn;
+
+	if (obj->efile.maps_shndx < 0)
+		return 0;
 
 	if (!symbols)
 		return -EINVAL;
-	nr_syms = symbols->d_size / sizeof(GElf_Sym);
-
-	if (obj->efile.maps_shndx >= 0) {
-		Elf_Scn *scn = elf_getscn(obj->efile.elf,
-					  obj->efile.maps_shndx);
 
-		if (scn)
-			data = elf_getdata(scn, NULL);
-		if (!scn || !data) {
-			pr_warning("failed to get Elf_Data from map section %d\n",
-				   obj->efile.maps_shndx);
-			return -EINVAL;
-		}
+	scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
+	if (scn)
+		data = elf_getdata(scn, NULL);
+	if (!scn || !data) {
+		pr_warning("failed to get Elf_Data from map section %d\n",
+			   obj->efile.maps_shndx);
+		return -EINVAL;
 	}
 
 	/*
@@ -840,16 +901,8 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
 	 *
 	 * TODO: Detect array of map and report error.
 	 */
-	if (obj->caps.global_data) {
-		if (obj->efile.data_shndx >= 0)
-			nr_maps_glob++;
-		if (obj->efile.rodata_shndx >= 0)
-			nr_maps_glob++;
-		if (obj->efile.bss_shndx >= 0)
-			nr_maps_glob++;
-	}
-
-	for (i = 0; data && i < nr_syms; i++) {
+	nr_syms = symbols->d_size / sizeof(GElf_Sym);
+	for (i = 0; i < nr_syms; i++) {
 		GElf_Sym sym;
 
 		if (!gelf_getsym(symbols, i, &sym))
@@ -858,79 +911,56 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
 			continue;
 		nr_maps++;
 	}
-
-	if (!nr_maps && !nr_maps_glob)
-		return 0;
-
 	/* Assume equally sized map definitions */
-	if (data) {
-		pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
-			 nr_maps, data->d_size);
-
-		map_def_sz = data->d_size / nr_maps;
-		if (!data->d_size || (data->d_size % nr_maps) != 0) {
-			pr_warning("unable to determine map definition size "
-				   "section %s, %d maps in %zd bytes\n",
-				   obj->path, nr_maps, data->d_size);
-			return -EINVAL;
-		}
-	}
-
-	nr_maps += nr_maps_glob;
-	obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
-	if (!obj->maps) {
-		pr_warning("alloc maps for object failed\n");
-		return -ENOMEM;
-	}
-	obj->nr_maps = nr_maps;
-
-	for (i = 0; i < nr_maps; i++) {
-		/*
-		 * fill all fd with -1 so won't close incorrect
-		 * fd (fd=0 is stdin) when failure (zclose won't close
-		 * negative fd)).
-		 */
-		obj->maps[i].fd = -1;
-		obj->maps[i].inner_map_fd = -1;
+	pr_debug("maps in %s: %d maps in %zd bytes\n",
+		 obj->path, nr_maps, data->d_size);
+
+	map_def_sz = data->d_size / nr_maps;
+	if (!data->d_size || (data->d_size % nr_maps) != 0) {
+		pr_warning("unable to determine map definition size "
+			   "section %s, %d maps in %zd bytes\n",
+			   obj->path, nr_maps, data->d_size);
+		return -EINVAL;
 	}
 
-	/*
-	 * Fill obj->maps using data in "maps" section.
-	 */
-	for (i = 0, map_idx = 0; data && i < nr_syms; i++) {
+	/* Fill obj->maps using data in "maps" section.  */
+	for (i = 0; i < nr_syms; i++) {
 		GElf_Sym sym;
 		const char *map_name;
 		struct bpf_map_def *def;
+		struct bpf_map *map;
 
 		if (!gelf_getsym(symbols, i, &sym))
 			continue;
 		if (sym.st_shndx != obj->efile.maps_shndx)
 			continue;
 
-		map_name = elf_strptr(obj->efile.elf,
-				      obj->efile.strtabidx,
+		map = bpf_object__add_map(obj);
+		if (IS_ERR(map))
+			return PTR_ERR(map);
+
+		map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
 				      sym.st_name);
 		if (!map_name) {
 			pr_warning("failed to get map #%d name sym string for obj %s\n",
-				   map_idx, obj->path);
+				   i, obj->path);
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
-		obj->maps[map_idx].libbpf_type = LIBBPF_MAP_UNSPEC;
-		obj->maps[map_idx].offset = sym.st_value;
+		map->libbpf_type = LIBBPF_MAP_UNSPEC;
+		map->offset = sym.st_value;
 		if (sym.st_value + map_def_sz > data->d_size) {
 			pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
 				   obj->path, map_name);
 			return -EINVAL;
 		}
 
-		obj->maps[map_idx].name = strdup(map_name);
-		if (!obj->maps[map_idx].name) {
+		map->name = strdup(map_name);
+		if (!map->name) {
 			pr_warning("failed to alloc map name\n");
 			return -ENOMEM;
 		}
-		pr_debug("map %d is \"%s\"\n", map_idx,
-			 obj->maps[map_idx].name);
+		pr_debug("map %d is \"%s\"\n", i, map->name);
 		def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
 		/*
 		 * If the definition of the map in the object file fits in
@@ -939,7 +969,7 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
 		 * calloc above.
 		 */
 		if (map_def_sz <= sizeof(struct bpf_map_def)) {
-			memcpy(&obj->maps[map_idx].def, def, map_def_sz);
+			memcpy(&map->def, def, map_def_sz);
 		} else {
 			/*
 			 * Here the map structure being read is bigger than what
@@ -959,37 +989,30 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
 						return -EINVAL;
 				}
 			}
-			memcpy(&obj->maps[map_idx].def, def,
-			       sizeof(struct bpf_map_def));
+			memcpy(&map->def, def, sizeof(struct bpf_map_def));
 		}
-		map_idx++;
 	}
+	return 0;
+}
 
-	if (!obj->caps.global_data)
-		goto finalize;
+static int bpf_object__init_maps(struct bpf_object *obj, int flags)
+{
+	bool strict = !(flags & MAPS_RELAX_COMPAT);
+	int err;
 
-	/*
-	 * Populate rest of obj->maps with libbpf internal maps.
-	 */
-	if (obj->efile.data_shndx >= 0)
-		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
-						    LIBBPF_MAP_DATA,
-						    obj->efile.data,
-						    &obj->sections.data);
-	if (!ret && obj->efile.rodata_shndx >= 0)
-		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
-						    LIBBPF_MAP_RODATA,
-						    obj->efile.rodata,
-						    &obj->sections.rodata);
-	if (!ret && obj->efile.bss_shndx >= 0)
-		ret = bpf_object__init_internal_map(obj, &obj->maps[map_idx++],
-						    LIBBPF_MAP_BSS,
-						    obj->efile.bss, NULL);
-finalize:
-	if (!ret)
+	err = bpf_object__init_user_maps(obj, strict);
+	if (err)
+		return err;
+
+	err = bpf_object__init_global_data_maps(obj);
+	if (err)
+		return err;
+
+	if (obj->nr_maps) {
 		qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
 		      compare_bpf_map);
-	return ret;
+	}
+	return 0;
 }
 
 static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -1262,14 +1285,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 		return -LIBBPF_ERRNO__FORMAT;
 	}
 	err = bpf_object__load_btf(obj, btf_data, btf_ext_data);
-	if (err)
-		return err;
-	if (bpf_object__has_maps(obj)) {
+	if (!err)
 		err = bpf_object__init_maps(obj, flags);
-		if (err)
-			return err;
-	}
-	err = bpf_object__init_prog_names(obj);
+	if (!err)
+		err = bpf_object__init_prog_names(obj);
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From db48814bd2833ee231f48bd2030082369b88f9ef Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:54 -0700
Subject: libbpf: identify maps by section index in addition to offset

To support maps to be defined in multiple sections, it's important to
identify map not just by offset within its section, but section index as
well. This patch adds tracking of section index.

For global data, we record section index of corresponding
.data/.bss/.rodata ELF section for uniformity, and thus don't need
a special value of offset for those maps.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 88609dca4f7d..b1f3ab4b39b3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -207,7 +207,8 @@ static const char * const libbpf_type_to_btf_name[] = {
 struct bpf_map {
 	int fd;
 	char *name;
-	size_t offset;
+	int sec_idx;
+	size_t sec_offset;
 	int map_ifindex;
 	int inner_map_fd;
 	struct bpf_map_def def;
@@ -647,7 +648,9 @@ static int compare_bpf_map(const void *_a, const void *_b)
 	const struct bpf_map *a = _a;
 	const struct bpf_map *b = _b;
 
-	return a->offset - b->offset;
+	if (a->sec_idx != b->sec_idx)
+		return a->sec_idx - b->sec_idx;
+	return a->sec_offset - b->sec_offset;
 }
 
 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
@@ -800,7 +803,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 
 static int
 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
-			      Elf_Data *data, void **data_buff)
+			      int sec_idx, Elf_Data *data, void **data_buff)
 {
 	char map_name[BPF_OBJ_NAME_LEN];
 	struct bpf_map_def *def;
@@ -811,7 +814,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 		return PTR_ERR(map);
 
 	map->libbpf_type = type;
-	map->offset = ~(typeof(map->offset))0;
+	map->sec_idx = sec_idx;
+	map->sec_offset = 0;
 	snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
 		 libbpf_type_to_btf_name[type]);
 	map->name = strdup(map_name);
@@ -819,6 +823,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 		pr_warning("failed to alloc map name\n");
 		return -ENOMEM;
 	}
+	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n",
+		 map_name, map->sec_idx, map->sec_offset);
 
 	def = &map->def;
 	def->type = BPF_MAP_TYPE_ARRAY;
@@ -851,6 +857,7 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 	 */
 	if (obj->efile.data_shndx >= 0) {
 		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
+						    obj->efile.data_shndx,
 						    obj->efile.data,
 						    &obj->sections.data);
 		if (err)
@@ -858,6 +865,7 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 	}
 	if (obj->efile.rodata_shndx >= 0) {
 		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
+						    obj->efile.rodata_shndx,
 						    obj->efile.rodata,
 						    &obj->sections.rodata);
 		if (err)
@@ -865,6 +873,7 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 	}
 	if (obj->efile.bss_shndx >= 0) {
 		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
+						    obj->efile.bss_shndx,
 						    obj->efile.bss, NULL);
 		if (err)
 			return err;
@@ -948,7 +957,10 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 		}
 
 		map->libbpf_type = LIBBPF_MAP_UNSPEC;
-		map->offset = sym.st_value;
+		map->sec_idx = sym.st_shndx;
+		map->sec_offset = sym.st_value;
+		pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
+			 map_name, map->sec_idx, map->sec_offset);
 		if (sym.st_value + map_def_sz > data->d_size) {
 			pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
 				   obj->path, map_name);
@@ -1448,9 +1460,13 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 				if (maps[map_idx].libbpf_type != type)
 					continue;
 				if (type != LIBBPF_MAP_UNSPEC ||
-				    maps[map_idx].offset == sym.st_value) {
-					pr_debug("relocation: find map %zd (%s) for insn %u\n",
-						 map_idx, maps[map_idx].name, insn_idx);
+				    (maps[map_idx].sec_idx == sym.st_shndx &&
+				     maps[map_idx].sec_offset == sym.st_value)) {
+					pr_debug("relocation: found map %zd (%s, sec_idx %d, offset %zu) for insn %u\n",
+						 map_idx, maps[map_idx].name,
+						 maps[map_idx].sec_idx,
+						 maps[map_idx].sec_offset,
+						 insn_idx);
 					break;
 				}
 			}
@@ -3468,13 +3484,7 @@ bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name)
 struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
 {
-	int i;
-
-	for (i = 0; i < obj->nr_maps; i++) {
-		if (obj->maps[i].offset == offset)
-			return &obj->maps[i];
-	}
-	return ERR_PTR(-ENOENT);
+	return ERR_PTR(-ENOTSUP);
 }
 
 long libbpf_get_error(const void *ptr)
-- 
cgit v1.2.3-59-g8ed1b


From 063183bf0486c7e61a2b454896bba2f7908b4ab0 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:55 -0700
Subject: libbpf: split initialization and loading of BTF

Libbpf does sanitization of BTF before loading it into kernel, if kernel
doesn't support some of newer BTF features. This removes some of the
important information from BTF (e.g., DATASEC and VAR description),
which will be used for map construction. This patch splits BTF
processing into initialization step, in which BTF is initialized from
ELF and all the original data is still preserved; and
sanitization/loading step, which ensures that BTF is safe to load into
kernel. This allows to use full BTF information to construct maps, while
still loading valid BTF into older kernels.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b1f3ab4b39b3..da942ab2f06a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1113,7 +1113,7 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
 	}
 }
 
-static int bpf_object__load_btf(struct bpf_object *obj,
+static int bpf_object__init_btf(struct bpf_object *obj,
 				Elf_Data *btf_data,
 				Elf_Data *btf_ext_data)
 {
@@ -1132,13 +1132,6 @@ static int bpf_object__load_btf(struct bpf_object *obj,
 				   BTF_ELF_SEC, err);
 			goto out;
 		}
-		bpf_object__sanitize_btf(obj);
-		err = btf__load(obj->btf);
-		if (err) {
-			pr_warning("Error loading %s into kernel: %d.\n",
-				   BTF_ELF_SEC, err);
-			goto out;
-		}
 	}
 	if (btf_ext_data) {
 		if (!obj->btf) {
@@ -1154,7 +1147,6 @@ static int bpf_object__load_btf(struct bpf_object *obj,
 			obj->btf_ext = NULL;
 			goto out;
 		}
-		bpf_object__sanitize_btf_ext(obj);
 	}
 out:
 	if (err || IS_ERR(obj->btf)) {
@@ -1165,6 +1157,26 @@ out:
 	return 0;
 }
 
+static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
+{
+	int err = 0;
+
+	if (!obj->btf)
+		return 0;
+
+	bpf_object__sanitize_btf(obj);
+	bpf_object__sanitize_btf_ext(obj);
+
+	err = btf__load(obj->btf);
+	if (err) {
+		pr_warning("Error loading %s into kernel: %d.\n",
+			   BTF_ELF_SEC, err);
+		btf__free(obj->btf);
+		obj->btf = NULL;
+	}
+	return 0;
+}
+
 static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 {
 	Elf *elf = obj->efile.elf;
@@ -1296,9 +1308,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 		pr_warning("Corrupted ELF file: index of strtab invalid\n");
 		return -LIBBPF_ERRNO__FORMAT;
 	}
-	err = bpf_object__load_btf(obj, btf_data, btf_ext_data);
+	err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
 	if (!err)
 		err = bpf_object__init_maps(obj, flags);
+	if (!err)
+		err = bpf_object__sanitize_and_load_btf(obj);
 	if (!err)
 		err = bpf_object__init_prog_names(obj);
 	return err;
-- 
cgit v1.2.3-59-g8ed1b


From abd29c9314595b1ee5ec6c61d7c49a497ffb30a3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:56 -0700
Subject: libbpf: allow specifying map definitions using BTF

This patch adds support for a new way to define BPF maps. It relies on
BTF to describe mandatory and optional attributes of a map, as well as
captures type information of key and value naturally. This eliminates
the need for BPF_ANNOTATE_KV_PAIR hack and ensures key/value sizes are
always in sync with the key/value type.

Relying on BTF, this approach allows for both forward and backward
compatibility w.r.t. extending supported map definition features. By
default, any unrecognized attributes are treated as an error, but it's
possible relax this using MAPS_RELAX_COMPAT flag. New attributes, added
in the future will need to be optional.

The outline of the new map definition (short, BTF-defined maps) is as follows:
1. All the maps should be defined in .maps ELF section. It's possible to
   have both "legacy" map definitions in `maps` sections and BTF-defined
   maps in .maps sections. Everything will still work transparently.
2. The map declaration and initialization is done through
   a global/static variable of a struct type with few mandatory and
   extra optional fields:
   - type field is mandatory and specified type of BPF map;
   - key/value fields are mandatory and capture key/value type/size information;
   - max_entries attribute is optional; if max_entries is not specified or
     initialized, it has to be provided in runtime through libbpf API
     before loading bpf_object;
   - map_flags is optional and if not defined, will be assumed to be 0.
3. Key/value fields should be **a pointer** to a type describing
   key/value. The pointee type is assumed (and will be recorded as such
   and used for size determination) to be a type describing key/value of
   the map. This is done to save excessive amounts of space allocated in
   corresponding ELF sections for key/value of big size.
4. As some maps disallow having BTF type ID associated with key/value,
   it's possible to specify key/value size explicitly without
   associating BTF type ID with it. Use key_size and value_size fields
   to do that (see example below).

Here's an example of simple ARRAY map defintion:

struct my_value { int x, y, z; };

struct {
	int type;
	int max_entries;
	int *key;
	struct my_value *value;
} btf_map SEC(".maps") = {
	.type = BPF_MAP_TYPE_ARRAY,
	.max_entries = 16,
};

This will define BPF ARRAY map 'btf_map' with 16 elements. The key will
be of type int and thus key size will be 4 bytes. The value is struct
my_value of size 12 bytes. This map can be used from C code exactly the
same as with existing maps defined through struct bpf_map_def.

Here's an example of STACKMAP definition (which currently disallows BTF type
IDs for key/value):

struct {
	__u32 type;
	__u32 max_entries;
	__u32 map_flags;
	__u32 key_size;
	__u32 value_size;
} stackmap SEC(".maps") = {
	.type = BPF_MAP_TYPE_STACK_TRACE,
	.max_entries = 128,
	.map_flags = BPF_F_STACK_BUILD_ID,
	.key_size = sizeof(__u32),
	.value_size = PERF_MAX_STACK_DEPTH * sizeof(struct bpf_stack_build_id),
};

This approach is naturally extended to support map-in-map, by making a value
field to be another struct that describes inner map. This feature is not
implemented yet. It's also possible to incrementally add features like pinning
with full backwards and forward compatibility. Support for static
initialization of BPF_MAP_TYPE_PROG_ARRAY using pointers to BPF programs
is also on the roadmap.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/btf.h    |   1 +
 tools/lib/bpf/libbpf.c | 353 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 345 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index ba4ffa831aa4..88a52ae56fc6 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -17,6 +17,7 @@ extern "C" {
 
 #define BTF_ELF_SEC ".BTF"
 #define BTF_EXT_ELF_SEC ".BTF.ext"
+#define MAPS_ELF_SEC ".maps"
 
 struct btf;
 struct btf_ext;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index da942ab2f06a..585e3a2f1eb4 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -262,6 +262,7 @@ struct bpf_object {
 		} *reloc;
 		int nr_reloc;
 		int maps_shndx;
+		int btf_maps_shndx;
 		int text_shndx;
 		int data_shndx;
 		int rodata_shndx;
@@ -514,6 +515,7 @@ static struct bpf_object *bpf_object__new(const char *path,
 	obj->efile.obj_buf = obj_buf;
 	obj->efile.obj_buf_sz = obj_buf_sz;
 	obj->efile.maps_shndx = -1;
+	obj->efile.btf_maps_shndx = -1;
 	obj->efile.data_shndx = -1;
 	obj->efile.rodata_shndx = -1;
 	obj->efile.bss_shndx = -1;
@@ -1007,6 +1009,312 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 	return 0;
 }
 
+static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
+						     __u32 id)
+{
+	const struct btf_type *t = btf__type_by_id(btf, id);
+
+	while (true) {
+		switch (BTF_INFO_KIND(t->info)) {
+		case BTF_KIND_VOLATILE:
+		case BTF_KIND_CONST:
+		case BTF_KIND_RESTRICT:
+		case BTF_KIND_TYPEDEF:
+			t = btf__type_by_id(btf, t->type);
+			break;
+		default:
+			return t;
+		}
+	}
+}
+
+static bool get_map_field_int(const char *map_name,
+			      const struct btf *btf,
+			      const struct btf_type *def,
+			      const struct btf_member *m,
+			      const void *data, __u32 *res) {
+	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
+	const char *name = btf__name_by_offset(btf, m->name_off);
+	__u32 int_info = *(const __u32 *)(const void *)(t + 1);
+
+	if (BTF_INFO_KIND(t->info) != BTF_KIND_INT) {
+		pr_warning("map '%s': attr '%s': expected INT, got %u.\n",
+			   map_name, name, BTF_INFO_KIND(t->info));
+		return false;
+	}
+	if (t->size != 4 || BTF_INT_BITS(int_info) != 32 ||
+	    BTF_INT_OFFSET(int_info)) {
+		pr_warning("map '%s': attr '%s': expected 32-bit non-bitfield integer, "
+			   "got %u-byte (%d-bit) one with bit offset %d.\n",
+			   map_name, name, t->size, BTF_INT_BITS(int_info),
+			   BTF_INT_OFFSET(int_info));
+		return false;
+	}
+	if (BTF_INFO_KFLAG(def->info) && BTF_MEMBER_BITFIELD_SIZE(m->offset)) {
+		pr_warning("map '%s': attr '%s': bitfield is not supported.\n",
+			   map_name, name);
+		return false;
+	}
+	if (m->offset % 32) {
+		pr_warning("map '%s': attr '%s': unaligned fields are not supported.\n",
+			   map_name, name);
+		return false;
+	}
+
+	*res = *(const __u32 *)(data + m->offset / 8);
+	return true;
+}
+
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+					 const struct btf_type *sec,
+					 int var_idx, int sec_idx,
+					 const Elf_Data *data, bool strict)
+{
+	const struct btf_type *var, *def, *t;
+	const struct btf_var_secinfo *vi;
+	const struct btf_var *var_extra;
+	const struct btf_member *m;
+	const void *def_data;
+	const char *map_name;
+	struct bpf_map *map;
+	int vlen, i;
+
+	vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx;
+	var = btf__type_by_id(obj->btf, vi->type);
+	var_extra = (const void *)(var + 1);
+	map_name = btf__name_by_offset(obj->btf, var->name_off);
+	vlen = BTF_INFO_VLEN(var->info);
+
+	if (map_name == NULL || map_name[0] == '\0') {
+		pr_warning("map #%d: empty name.\n", var_idx);
+		return -EINVAL;
+	}
+	if ((__u64)vi->offset + vi->size > data->d_size) {
+		pr_warning("map '%s' BTF data is corrupted.\n", map_name);
+		return -EINVAL;
+	}
+	if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) {
+		pr_warning("map '%s': unexpected var kind %u.\n",
+			   map_name, BTF_INFO_KIND(var->info));
+		return -EINVAL;
+	}
+	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+	    var_extra->linkage != BTF_VAR_STATIC) {
+		pr_warning("map '%s': unsupported var linkage %u.\n",
+			   map_name, var_extra->linkage);
+		return -EOPNOTSUPP;
+	}
+
+	def = skip_mods_and_typedefs(obj->btf, var->type);
+	if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) {
+		pr_warning("map '%s': unexpected def kind %u.\n",
+			   map_name, BTF_INFO_KIND(var->info));
+		return -EINVAL;
+	}
+	if (def->size > vi->size) {
+		pr_warning("map '%s': invalid def size.\n", map_name);
+		return -EINVAL;
+	}
+
+	map = bpf_object__add_map(obj);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	map->name = strdup(map_name);
+	if (!map->name) {
+		pr_warning("map '%s': failed to alloc map name.\n", map_name);
+		return -ENOMEM;
+	}
+	map->libbpf_type = LIBBPF_MAP_UNSPEC;
+	map->def.type = BPF_MAP_TYPE_UNSPEC;
+	map->sec_idx = sec_idx;
+	map->sec_offset = vi->offset;
+	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+		 map_name, map->sec_idx, map->sec_offset);
+
+	def_data = data->d_buf + vi->offset;
+	vlen = BTF_INFO_VLEN(def->info);
+	m = (const void *)(def + 1);
+	for (i = 0; i < vlen; i++, m++) {
+		const char *name = btf__name_by_offset(obj->btf, m->name_off);
+
+		if (!name) {
+			pr_warning("map '%s': invalid field #%d.\n",
+				   map_name, i);
+			return -EINVAL;
+		}
+		if (strcmp(name, "type") == 0) {
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       def_data, &map->def.type))
+				return -EINVAL;
+			pr_debug("map '%s': found type = %u.\n",
+				 map_name, map->def.type);
+		} else if (strcmp(name, "max_entries") == 0) {
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       def_data, &map->def.max_entries))
+				return -EINVAL;
+			pr_debug("map '%s': found max_entries = %u.\n",
+				 map_name, map->def.max_entries);
+		} else if (strcmp(name, "map_flags") == 0) {
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       def_data, &map->def.map_flags))
+				return -EINVAL;
+			pr_debug("map '%s': found map_flags = %u.\n",
+				 map_name, map->def.map_flags);
+		} else if (strcmp(name, "key_size") == 0) {
+			__u32 sz;
+
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       def_data, &sz))
+				return -EINVAL;
+			pr_debug("map '%s': found key_size = %u.\n",
+				 map_name, sz);
+			if (map->def.key_size && map->def.key_size != sz) {
+				pr_warning("map '%s': conflictling key size %u != %u.\n",
+					   map_name, map->def.key_size, sz);
+				return -EINVAL;
+			}
+			map->def.key_size = sz;
+		} else if (strcmp(name, "key") == 0) {
+			__s64 sz;
+
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warning("map '%s': key type [%d] not found.\n",
+					   map_name, m->type);
+				return -EINVAL;
+			}
+			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+				pr_warning("map '%s': key spec is not PTR: %u.\n",
+					   map_name, BTF_INFO_KIND(t->info));
+				return -EINVAL;
+			}
+			sz = btf__resolve_size(obj->btf, t->type);
+			if (sz < 0) {
+				pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n",
+					   map_name, t->type, sz);
+				return sz;
+			}
+			pr_debug("map '%s': found key [%u], sz = %lld.\n",
+				 map_name, t->type, sz);
+			if (map->def.key_size && map->def.key_size != sz) {
+				pr_warning("map '%s': conflictling key size %u != %lld.\n",
+					   map_name, map->def.key_size, sz);
+				return -EINVAL;
+			}
+			map->def.key_size = sz;
+			map->btf_key_type_id = t->type;
+		} else if (strcmp(name, "value_size") == 0) {
+			__u32 sz;
+
+			if (!get_map_field_int(map_name, obj->btf, def, m,
+					       def_data, &sz))
+				return -EINVAL;
+			pr_debug("map '%s': found value_size = %u.\n",
+				 map_name, sz);
+			if (map->def.value_size && map->def.value_size != sz) {
+				pr_warning("map '%s': conflictling value size %u != %u.\n",
+					   map_name, map->def.value_size, sz);
+				return -EINVAL;
+			}
+			map->def.value_size = sz;
+		} else if (strcmp(name, "value") == 0) {
+			__s64 sz;
+
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warning("map '%s': value type [%d] not found.\n",
+					   map_name, m->type);
+				return -EINVAL;
+			}
+			if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+				pr_warning("map '%s': value spec is not PTR: %u.\n",
+					   map_name, BTF_INFO_KIND(t->info));
+				return -EINVAL;
+			}
+			sz = btf__resolve_size(obj->btf, t->type);
+			if (sz < 0) {
+				pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n",
+					   map_name, t->type, sz);
+				return sz;
+			}
+			pr_debug("map '%s': found value [%u], sz = %lld.\n",
+				 map_name, t->type, sz);
+			if (map->def.value_size && map->def.value_size != sz) {
+				pr_warning("map '%s': conflictling value size %u != %lld.\n",
+					   map_name, map->def.value_size, sz);
+				return -EINVAL;
+			}
+			map->def.value_size = sz;
+			map->btf_value_type_id = t->type;
+		} else {
+			if (strict) {
+				pr_warning("map '%s': unknown field '%s'.\n",
+					   map_name, name);
+				return -ENOTSUP;
+			}
+			pr_debug("map '%s': ignoring unknown field '%s'.\n",
+				 map_name, name);
+		}
+	}
+
+	if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
+		pr_warning("map '%s': map type isn't specified.\n", map_name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
+{
+	const struct btf_type *sec = NULL;
+	int nr_types, i, vlen, err;
+	const struct btf_type *t;
+	const char *name;
+	Elf_Data *data;
+	Elf_Scn *scn;
+
+	if (obj->efile.btf_maps_shndx < 0)
+		return 0;
+
+	scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
+	if (scn)
+		data = elf_getdata(scn, NULL);
+	if (!scn || !data) {
+		pr_warning("failed to get Elf_Data from map section %d (%s)\n",
+			   obj->efile.maps_shndx, MAPS_ELF_SEC);
+		return -EINVAL;
+	}
+
+	nr_types = btf__get_nr_types(obj->btf);
+	for (i = 1; i <= nr_types; i++) {
+		t = btf__type_by_id(obj->btf, i);
+		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
+			continue;
+		name = btf__name_by_offset(obj->btf, t->name_off);
+		if (strcmp(name, MAPS_ELF_SEC) == 0) {
+			sec = t;
+			break;
+		}
+	}
+
+	if (!sec) {
+		pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
+		return -ENOENT;
+	}
+
+	vlen = BTF_INFO_VLEN(sec->info);
+	for (i = 0; i < vlen; i++) {
+		err = bpf_object__init_user_btf_map(obj, sec, i,
+						    obj->efile.btf_maps_shndx,
+						    data, strict);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int bpf_object__init_maps(struct bpf_object *obj, int flags)
 {
 	bool strict = !(flags & MAPS_RELAX_COMPAT);
@@ -1016,6 +1324,10 @@ static int bpf_object__init_maps(struct bpf_object *obj, int flags)
 	if (err)
 		return err;
 
+	err = bpf_object__init_user_btf_maps(obj, strict);
+	if (err)
+		return err;
+
 	err = bpf_object__init_global_data_maps(obj);
 	if (err)
 		return err;
@@ -1113,10 +1425,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
 	}
 }
 
+static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
+{
+	return obj->efile.btf_maps_shndx >= 0;
+}
+
 static int bpf_object__init_btf(struct bpf_object *obj,
 				Elf_Data *btf_data,
 				Elf_Data *btf_ext_data)
 {
+	bool btf_required = bpf_object__is_btf_mandatory(obj);
 	int err = 0;
 
 	if (btf_data) {
@@ -1150,10 +1468,18 @@ static int bpf_object__init_btf(struct bpf_object *obj,
 	}
 out:
 	if (err || IS_ERR(obj->btf)) {
+		if (btf_required)
+			err = err ? : PTR_ERR(obj->btf);
+		else
+			err = 0;
 		if (!IS_ERR_OR_NULL(obj->btf))
 			btf__free(obj->btf);
 		obj->btf = NULL;
 	}
+	if (btf_required && !obj->btf) {
+		pr_warning("BTF is required, but is missing or corrupted.\n");
+		return err == 0 ? -ENOENT : err;
+	}
 	return 0;
 }
 
@@ -1173,6 +1499,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 			   BTF_ELF_SEC, err);
 		btf__free(obj->btf);
 		obj->btf = NULL;
+		if (bpf_object__is_btf_mandatory(obj))
+			return err;
 	}
 	return 0;
 }
@@ -1236,6 +1564,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
 				return err;
 		} else if (strcmp(name, "maps") == 0) {
 			obj->efile.maps_shndx = idx;
+		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
+			obj->efile.btf_maps_shndx = idx;
 		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
 			btf_data = data;
 		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
@@ -1355,7 +1685,8 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
 				      int shndx)
 {
-	return shndx == obj->efile.maps_shndx;
+	return shndx == obj->efile.maps_shndx ||
+	       shndx == obj->efile.btf_maps_shndx;
 }
 
 static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
@@ -1399,14 +1730,14 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	prog->nr_reloc = nrels;
 
 	for (i = 0; i < nrels; i++) {
-		GElf_Sym sym;
-		GElf_Rel rel;
-		unsigned int insn_idx;
-		unsigned int shdr_idx;
 		struct bpf_insn *insns = prog->insns;
 		enum libbpf_map_type type;
+		unsigned int insn_idx;
+		unsigned int shdr_idx;
 		const char *name;
 		size_t map_idx;
+		GElf_Sym sym;
+		GElf_Rel rel;
 
 		if (!gelf_getrel(data, i, &rel)) {
 			pr_warning("relocation: failed to get %d reloc\n", i);
@@ -1500,14 +1831,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
 	return 0;
 }
 
-static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
+static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 {
 	struct bpf_map_def *def = &map->def;
 	__u32 key_type_id = 0, value_type_id = 0;
 	int ret;
 
+	/* if it's BTF-defined map, we don't need to search for type IDs */
+	if (map->sec_idx == obj->efile.btf_maps_shndx)
+		return 0;
+
 	if (!bpf_map__is_internal(map)) {
-		ret = btf__get_map_kv_tids(btf, map->name, def->key_size,
+		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
 					   def->value_size, &key_type_id,
 					   &value_type_id);
 	} else {
@@ -1515,7 +1850,7 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
 		 * LLVM annotates global data differently in BTF, that is,
 		 * only as '.data', '.bss' or '.rodata'.
 		 */
-		ret = btf__find_by_name(btf,
+		ret = btf__find_by_name(obj->btf,
 				libbpf_type_to_btf_name[map->libbpf_type]);
 	}
 	if (ret < 0)
@@ -1805,7 +2140,7 @@ bpf_object__create_maps(struct bpf_object *obj)
 		    map->inner_map_fd >= 0)
 			create_attr.inner_map_fd = map->inner_map_fd;
 
-		if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
+		if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
 			create_attr.btf_fd = btf__fd(obj->btf);
 			create_attr.btf_key_type_id = map->btf_key_type_id;
 			create_attr.btf_value_type_id = map->btf_value_type_id;
-- 
cgit v1.2.3-59-g8ed1b


From 9e3d709c47ca3d10df461635b350edef94f3cf11 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:57 -0700
Subject: selftests/bpf: add test for BTF-defined maps

Add file test for BTF-defined map definition.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/test_btf_newkv.c | 73 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_btf.c             | 10 +--
 2 files changed, 76 insertions(+), 7 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_btf_newkv.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
new file mode 100644
index 000000000000..28c16bb583b6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+/* just to validate we can handle maps in multiple sections */
+struct bpf_map_def SEC("maps") btf_map_legacy = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(long long),
+	.max_entries = 4,
+};
+
+BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
+
+struct {
+	int *key;
+	struct ipv_counts *value;
+	unsigned int type;
+	unsigned int max_entries;
+} btf_map SEC(".maps") = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	/* just verify we can reference both maps */
+	counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
+	if (!counts)
+		return 0;
+
+	return 0;
+}
+
+__attribute__((noinline))
+static int test_long_fname_1(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_2(arg);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	return test_long_fname_1(arg);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 289daf54dec4..8351cb5f4a20 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -4016,13 +4016,9 @@ struct btf_file_test {
 };
 
 static struct btf_file_test file_tests[] = {
-{
-	.file = "test_btf_haskv.o",
-},
-{
-	.file = "test_btf_nokv.o",
-	.btf_kv_notfound = true,
-},
+	{ .file = "test_btf_haskv.o", },
+	{ .file = "test_btf_newkv.o", },
+	{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
 };
 
 static int do_test_file(unsigned int test_num)
-- 
cgit v1.2.3-59-g8ed1b


From f654407481e9bbba7039e939163ef60f9d5e43ba Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:58 -0700
Subject: selftests/bpf: switch BPF_ANNOTATE_KV_PAIR tests to BTF-defined maps

Switch tests that already rely on BTF to BTF-defined map definitions.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/netcnt_prog.c    | 22 ++++----
 tools/testing/selftests/bpf/progs/test_map_lock.c  | 22 ++++----
 .../selftests/bpf/progs/test_send_signal_kern.c    | 22 ++++----
 .../selftests/bpf/progs/test_sock_fields_kern.c    | 60 +++++++++++++---------
 tools/testing/selftests/bpf/progs/test_spin_lock.c | 33 ++++++------
 5 files changed, 87 insertions(+), 72 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index 9f741e69cebe..a25c82a5b7c8 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -10,24 +10,22 @@
 #define REFRESH_TIME_NS	100000000
 #define NS_PER_SEC	1000000000
 
-struct bpf_map_def SEC("maps") percpu_netcnt = {
+struct {
+	__u32 type;
+	struct bpf_cgroup_storage_key *key;
+	struct percpu_net_cnt *value;
+} percpu_netcnt SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
-	.key_size = sizeof(struct bpf_cgroup_storage_key),
-	.value_size = sizeof(struct percpu_net_cnt),
 };
 
-BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key,
-		     struct percpu_net_cnt);
-
-struct bpf_map_def SEC("maps") netcnt = {
+struct {
+	__u32 type;
+	struct bpf_cgroup_storage_key *key;
+	struct net_cnt *value;
+} netcnt SEC(".maps") = {
 	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
-	.key_size = sizeof(struct bpf_cgroup_storage_key),
-	.value_size = sizeof(struct net_cnt),
 };
 
-BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key,
-		     struct net_cnt);
-
 SEC("cgroup/skb")
 int bpf_nextcnt(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index af8cc68ed2f9..40d9c2853393 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -11,29 +11,31 @@ struct hmap_elem {
 	int var[VAR_NUM];
 };
 
-struct bpf_map_def SEC("maps") hash_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct hmap_elem *value;
+} hash_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct hmap_elem),
 	.max_entries = 1,
 };
 
-BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem);
-
 struct array_elem {
 	struct bpf_spin_lock lock;
 	int var[VAR_NUM];
 };
 
-struct bpf_map_def SEC("maps") array_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	int *key;
+	struct array_elem *value;
+} array_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct array_elem),
 	.max_entries = 1,
 };
 
-BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem);
-
 SEC("map_lock_demo")
 int bpf_map_lock_test(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index 45a1a1a2c345..6ac68be5d68b 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -4,24 +4,26 @@
 #include <linux/version.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") info_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u64 *value;
+} info_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
 	.max_entries = 1,
 };
 
-BPF_ANNOTATE_KV_PAIR(info_map, __u32, __u64);
-
-struct bpf_map_def SEC("maps") status_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u64 *value;
+} status_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
 	.max_entries = 1,
 };
 
-BPF_ANNOTATE_KV_PAIR(status_map, __u32, __u64);
-
 SEC("send_signal_demo")
 int bpf_send_signal_test(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index 1c39e4ccb7f1..c3d383d650cb 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -27,31 +27,43 @@ enum bpf_linum_array_idx {
 	__NR_BPF_LINUM_ARRAY_IDX,
 };
 
-struct bpf_map_def SEC("maps") addr_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct sockaddr_in6 *value;
+} addr_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct sockaddr_in6),
 	.max_entries = __NR_BPF_ADDR_ARRAY_IDX,
 };
 
-struct bpf_map_def SEC("maps") sock_result_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct bpf_sock *value;
+} sock_result_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_sock),
 	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,
 };
 
-struct bpf_map_def SEC("maps") tcp_sock_result_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct bpf_tcp_sock *value;
+} tcp_sock_result_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_tcp_sock),
 	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,
 };
 
-struct bpf_map_def SEC("maps") linum_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} linum_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = __NR_BPF_LINUM_ARRAY_IDX,
 };
 
@@ -60,26 +72,26 @@ struct bpf_spinlock_cnt {
 	__u32 cnt;
 };
 
-struct bpf_map_def SEC("maps") sk_pkt_out_cnt = {
+struct {
+	__u32 type;
+	__u32 map_flags;
+	int *key;
+	struct bpf_spinlock_cnt *value;
+} sk_pkt_out_cnt SEC(".maps") = {
 	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct bpf_spinlock_cnt),
-	.max_entries = 0,
 	.map_flags = BPF_F_NO_PREALLOC,
 };
 
-BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt, int, struct bpf_spinlock_cnt);
-
-struct bpf_map_def SEC("maps") sk_pkt_out_cnt10 = {
+struct {
+	__u32 type;
+	__u32 map_flags;
+	int *key;
+	struct bpf_spinlock_cnt *value;
+} sk_pkt_out_cnt10 SEC(".maps") = {
 	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct bpf_spinlock_cnt),
-	.max_entries = 0,
 	.map_flags = BPF_F_NO_PREALLOC,
 };
 
-BPF_ANNOTATE_KV_PAIR(sk_pkt_out_cnt10, int, struct bpf_spinlock_cnt);
-
 static bool is_loopback6(__u32 *a6)
 {
 	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 40f904312090..0a77ae36d981 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -10,30 +10,29 @@ struct hmap_elem {
 	int test_padding;
 };
 
-struct bpf_map_def SEC("maps") hmap = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	int *key;
+	struct hmap_elem *value;
+} hmap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct hmap_elem),
 	.max_entries = 1,
 };
 
-BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem);
-
-
 struct cls_elem {
 	struct bpf_spin_lock lock;
 	volatile int cnt;
 };
 
-struct bpf_map_def SEC("maps") cls_map = {
+struct {
+	__u32 type;
+	struct bpf_cgroup_storage_key *key;
+	struct cls_elem *value;
+} cls_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
-	.key_size = sizeof(struct bpf_cgroup_storage_key),
-	.value_size = sizeof(struct cls_elem),
 };
 
-BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key,
-		     struct cls_elem);
-
 struct bpf_vqueue {
 	struct bpf_spin_lock lock;
 	/* 4 byte hole */
@@ -42,14 +41,16 @@ struct bpf_vqueue {
 	unsigned int rate;
 };
 
-struct bpf_map_def SEC("maps") vqueue = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	int *key;
+	struct bpf_vqueue *value;
+} vqueue SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct bpf_vqueue),
 	.max_entries = 1,
 };
 
-BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue);
 #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
 
 SEC("spin_lock_demo")
-- 
cgit v1.2.3-59-g8ed1b


From df0b7792598291128fcca66b039fd027be25c10a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 12:26:59 -0700
Subject: selftests/bpf: convert tests w/ custom values to BTF-defined maps

Convert a bulk of selftests that have maps with custom (not integer) key
and/or value.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/bpf_flow.c       | 18 +++++--
 .../selftests/bpf/progs/socket_cookie_prog.c       | 11 ++--
 .../selftests/bpf/progs/test_get_stack_rawtp.c     | 27 +++++++---
 .../testing/selftests/bpf/progs/test_global_data.c | 27 ++++++----
 tools/testing/selftests/bpf/progs/test_l4lb.c      | 45 ++++++++++------
 .../selftests/bpf/progs/test_l4lb_noinline.c       | 45 ++++++++++------
 .../bpf/progs/test_select_reuseport_kern.c         | 45 +++++++++++-----
 .../selftests/bpf/progs/test_stacktrace_build_id.c | 44 +++++++++++-----
 .../selftests/bpf/progs/test_stacktrace_map.c      | 40 ++++++++++-----
 .../testing/selftests/bpf/progs/test_tcp_estats.c  |  9 ++--
 .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 18 ++++---
 .../selftests/bpf/progs/test_tcpnotify_kern.c      | 18 +++++--
 tools/testing/selftests/bpf/progs/test_xdp.c       | 18 ++++---
 .../selftests/bpf/progs/test_xdp_noinline.c        | 60 +++++++++++++---------
 14 files changed, 285 insertions(+), 140 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 81ad9a0b29d0..849f42e548b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -57,17 +57,25 @@ struct frag_hdr {
 	__be32 identification;
 };
 
-struct bpf_map_def SEC("maps") jmp_table = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 key_size;
+	__u32 value_size;
+} jmp_table SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PROG_ARRAY,
+	.max_entries = 8,
 	.key_size = sizeof(__u32),
 	.value_size = sizeof(__u32),
-	.max_entries = 8
 };
 
-struct bpf_map_def SEC("maps") last_dissection = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct bpf_flow_keys *value;
+} last_dissection SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_flow_keys),
 	.max_entries = 1,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
index 0db15c3210ad..6aabb681fb9a 100644
--- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
@@ -12,15 +12,16 @@ struct socket_cookie {
 	__u32 cookie_value;
 };
 
-struct bpf_map_def SEC("maps") socket_cookies = {
+struct {
+	__u32 type;
+	__u32 map_flags;
+	int *key;
+	struct socket_cookie *value;
+} socket_cookies SEC(".maps") = {
 	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct socket_cookie),
 	.map_flags = BPF_F_NO_PREALLOC,
 };
 
-BPF_ANNOTATE_KV_PAIR(socket_cookies, int, struct socket_cookie);
-
 SEC("cgroup/connect6")
 int set_cookie(struct bpf_sock_addr *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index f6d9f238e00a..aaa6ec250e15 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -15,17 +15,25 @@ struct stack_trace_t {
 	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
 };
 
-struct bpf_map_def SEC("maps") perfmap = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 key_size;
+	__u32 value_size;
+} perfmap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.max_entries = 2,
 	.key_size = sizeof(int),
 	.value_size = sizeof(__u32),
-	.max_entries = 2,
 };
 
-struct bpf_map_def SEC("maps") stackdata_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct stack_trace_t *value;
+} stackdata_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct stack_trace_t),
 	.max_entries = 1,
 };
 
@@ -47,10 +55,13 @@ struct bpf_map_def SEC("maps") stackdata_map = {
  * issue and avoid complicated C programming massaging.
  * This is an acceptable workaround since there is one entry here.
  */
-struct bpf_map_def SEC("maps") rawdata_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u64 (*value)[2 * MAX_STACK_RAWTP];
+} rawdata_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
 	.max_entries = 1,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 5ab14e941980..866cc7ddbe43 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -7,17 +7,23 @@
 
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") result_number = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u64 *value;
+} result_number SEC(".maps") = {
 	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(__u32),
-	.value_size	= sizeof(__u64),
 	.max_entries	= 11,
 };
 
-struct bpf_map_def SEC("maps") result_string = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	const char (*value)[32];
+} result_string SEC(".maps") = {
 	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(__u32),
-	.value_size	= 32,
 	.max_entries	= 5,
 };
 
@@ -27,10 +33,13 @@ struct foo {
 	__u64 c;
 };
 
-struct bpf_map_def SEC("maps") result_struct = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct foo *value;
+} result_struct SEC(".maps") = {
 	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(__u32),
-	.value_size	= sizeof(struct foo),
 	.max_entries	= 5,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 1e10c9590991..848cbb90f581 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -169,38 +169,53 @@ struct eth_hdr {
 	unsigned short eth_proto;
 };
 
-struct bpf_map_def SEC("maps") vip_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	struct vip *key;
+	struct vip_meta *value;
+} vip_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip),
-	.value_size = sizeof(struct vip_meta),
 	.max_entries = MAX_VIPS,
 };
 
-struct bpf_map_def SEC("maps") ch_rings = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} ch_rings SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = CH_RINGS_SIZE,
 };
 
-struct bpf_map_def SEC("maps") reals = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct real_definition *value;
+} reals SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct real_definition),
 	.max_entries = MAX_REALS,
 };
 
-struct bpf_map_def SEC("maps") stats = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct vip_stats *value;
+} stats SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct vip_stats),
 	.max_entries = MAX_VIPS,
 };
 
-struct bpf_map_def SEC("maps") ctl_array = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct ctl_value *value;
+} ctl_array SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct ctl_value),
 	.max_entries = CTL_MAP_SIZE,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index ba44a14e6dc4..c63ecf3ca573 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -165,38 +165,53 @@ struct eth_hdr {
 	unsigned short eth_proto;
 };
 
-struct bpf_map_def SEC("maps") vip_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	struct vip *key;
+	struct vip_meta *value;
+} vip_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip),
-	.value_size = sizeof(struct vip_meta),
 	.max_entries = MAX_VIPS,
 };
 
-struct bpf_map_def SEC("maps") ch_rings = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} ch_rings SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = CH_RINGS_SIZE,
 };
 
-struct bpf_map_def SEC("maps") reals = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct real_definition *value;
+} reals SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct real_definition),
 	.max_entries = MAX_REALS,
 };
 
-struct bpf_map_def SEC("maps") stats = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct vip_stats *value;
+} stats SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct vip_stats),
 	.max_entries = MAX_VIPS,
 };
 
-struct bpf_map_def SEC("maps") ctl_array = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct ctl_value *value;
+} ctl_array SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct ctl_value),
 	.max_entries = CTL_MAP_SIZE,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 5b54ec637ada..435a9527733e 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -21,38 +21,55 @@ int _version SEC("version") = 1;
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
 
-struct bpf_map_def SEC("maps") outer_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 key_size;
+	__u32 value_size;
+} outer_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.max_entries = 1,
 	.key_size = sizeof(__u32),
 	.value_size = sizeof(__u32),
-	.max_entries = 1,
 };
 
-struct bpf_map_def SEC("maps") result_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} result_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = NR_RESULTS,
 };
 
-struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	int *value;
+} tmp_index_ovr_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(int),
 	.max_entries = 1,
 };
 
-struct bpf_map_def SEC("maps") linum_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} linum_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = 1,
 };
 
-struct bpf_map_def SEC("maps") data_check_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct data_check *value;
+} data_check_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct data_check),
 	.max_entries = 1,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index d86c281e957f..fcf2280bb60c 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -8,34 +8,50 @@
 #define PERF_MAX_STACK_DEPTH         127
 #endif
 
-struct bpf_map_def SEC("maps") control_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} control_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = 1,
 };
 
-struct bpf_map_def SEC("maps") stackid_hmap = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} stackid_hmap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = 16384,
 };
 
-struct bpf_map_def SEC("maps") stackmap = {
+typedef struct bpf_stack_build_id stack_trace_t[PERF_MAX_STACK_DEPTH];
+
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 map_flags;
+	__u32 key_size;
+	__u32 value_size;
+} stackmap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_stack_build_id)
-		* PERF_MAX_STACK_DEPTH,
 	.max_entries = 128,
 	.map_flags = BPF_F_STACK_BUILD_ID,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(stack_trace_t),
 };
 
-struct bpf_map_def SEC("maps") stack_amap = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	/* there seems to be a bug in kernel not handling typedef properly */
+	struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH];
+} stack_amap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct bpf_stack_build_id)
-		* PERF_MAX_STACK_DEPTH,
 	.max_entries = 128,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index af111af7ca1a..7ad09adbf648 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -8,31 +8,47 @@
 #define PERF_MAX_STACK_DEPTH         127
 #endif
 
-struct bpf_map_def SEC("maps") control_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} control_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = 1,
 };
 
-struct bpf_map_def SEC("maps") stackid_hmap = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} stackid_hmap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = 16384,
 };
 
-struct bpf_map_def SEC("maps") stackmap = {
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 key_size;
+	__u32 value_size;
+} stackmap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
 	.max_entries = 16384,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(stack_trace_t),
 };
 
-struct bpf_map_def SEC("maps") stack_amap = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u64 (*value)[PERF_MAX_STACK_DEPTH];
+} stack_amap SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
 	.max_entries = 16384,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index bee3bbecc0c4..df98f7e32832 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -148,10 +148,13 @@ struct tcp_estats_basic_event {
 	struct tcp_estats_conn_id conn_id;
 };
 
-struct bpf_map_def SEC("maps") ev_record_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct tcp_estats_basic_event *value;
+} ev_record_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct tcp_estats_basic_event),
 	.max_entries = 1024,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index c7c3240e0dd4..38e10c9fd996 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -14,17 +14,23 @@
 #include "bpf_endian.h"
 #include "test_tcpbpf.h"
 
-struct bpf_map_def SEC("maps") global_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct tcpbpf_globals *value;
+} global_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct tcpbpf_globals),
 	.max_entries = 4,
 };
 
-struct bpf_map_def SEC("maps") sockopt_results = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	int *value;
+} sockopt_results SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(int),
 	.max_entries = 2,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index ec6db6e64c41..d073d37d4e27 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -14,18 +14,26 @@
 #include "bpf_endian.h"
 #include "test_tcpnotify.h"
 
-struct bpf_map_def SEC("maps") global_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct tcpnotify_globals *value;
+} global_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct tcpnotify_globals),
 	.max_entries = 4,
 };
 
-struct bpf_map_def SEC("maps") perf_event_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 key_size;
+	__u32 value_size;
+} perf_event_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.max_entries = 2,
 	.key_size = sizeof(int),
 	.value_size = sizeof(__u32),
-	.max_entries = 2,
 };
 
 int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 5e7df8bb5b5d..ec3d2c1c8cf9 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -22,17 +22,23 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def SEC("maps") rxcnt = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u64 *value;
+} rxcnt SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
 	.max_entries = 256,
 };
 
-struct bpf_map_def SEC("maps") vip2tnl = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	struct vip *key;
+	struct iptnl_info *value;
+} vip2tnl SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip),
-	.value_size = sizeof(struct iptnl_info),
 	.max_entries = MAX_IPTNL_ENTRIES,
 };
 
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 4fe6aaad22a4..d2eddb5553d1 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -163,52 +163,66 @@ struct lb_stats {
 	__u64 v1;
 };
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	struct vip_definition *key;
+	struct vip_meta *value;
+} vip_map SEC(".maps") = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip_definition),
-	.value_size = sizeof(struct vip_meta),
 	.max_entries = 512,
-	.map_flags = 0,
 };
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 map_flags;
+	struct flow_key *key;
+	struct real_pos_lru *value;
+} lru_cache SEC(".maps") = {
 	.type = BPF_MAP_TYPE_LRU_HASH,
-	.key_size = sizeof(struct flow_key),
-	.value_size = sizeof(struct real_pos_lru),
 	.max_entries = 300,
 	.map_flags = 1U << 1,
 };
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	__u32 *value;
+} ch_rings SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
 	.max_entries = 12 * 655,
-	.map_flags = 0,
 };
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct real_definition *value;
+} reals SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct real_definition),
 	.max_entries = 40,
-	.map_flags = 0,
 };
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct lb_stats *value;
+} stats SEC(".maps") = {
 	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct lb_stats),
 	.max_entries = 515,
-	.map_flags = 0,
 };
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+struct {
+	__u32 type;
+	__u32 max_entries;
+	__u32 *key;
+	struct ctl_value *value;
+} ctl_array SEC(".maps") = {
 	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct ctl_value),
 	.max_entries = 16,
-	.map_flags = 0,
 };
 
 struct eth_hdr {
-- 
cgit v1.2.3-59-g8ed1b


From 3cfa148826e3c666da1cc2a43fbe8689e2650636 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 17 Jun 2019 16:02:28 +0200
Subject: selftests: rtnetlink: add addresses with fixed life time

This exercises kernel code path that deal with addresses that have
a limited lifetime.

Without previous fix, this triggers following crash on net-next:
 BUG: KASAN: null-ptr-deref in check_lifetime+0x403/0x670
 Read of size 8 at addr 0000000000000010 by task kworker [..]

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index b25c9fe019d2..ed606a2e3865 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -249,6 +249,26 @@ kci_test_route_get()
 	echo "PASS: route get"
 }
 
+kci_test_addrlft()
+{
+	for i in $(seq 10 100) ;do
+		lft=$(((RANDOM%3) + 1))
+		ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
+		check_err $?
+	done
+
+	sleep 5
+
+	ip addr show dev "$devdummy" | grep "10.23.11."
+	if [ $? -eq 0 ]; then
+		echo "FAIL: preferred_lft addresses remaining"
+		check_err 1
+		return
+	fi
+
+	echo "PASS: preferred_lft addresses have expired"
+}
+
 kci_test_addrlabel()
 {
 	ret=0
@@ -1140,6 +1160,7 @@ kci_test_rtnl()
 
 	kci_test_polrouting
 	kci_test_route_get
+	kci_test_addrlft
 	kci_test_tc
 	kci_test_gre
 	kci_test_gretap
-- 
cgit v1.2.3-59-g8ed1b


From 79ebc3c26010eba3b6eaaad0efdf1f8ec19aaf9d Mon Sep 17 00:00:00 2001
From: Fred Klassen <fklassen@appneta.com>
Date: Mon, 17 Jun 2019 12:08:35 -0700
Subject: net/udpgso_bench_tx: options to exercise TX CMSG

This enhancement adds options that facilitate load testing with
additional TX CMSG options, and to optionally print results of
various send CMSG operations.

These options are especially useful in isolating situations
where error-queue messages are lost when combined with other
CMSG operations (e.g. SO_ZEROCOPY).

New options:
    -a - count all CMSG messages and match to sent messages
    -T - add TX CMSG that requests TX software timestamps
    -H - similar to -T except request TX hardware timestamps
    -P - call poll() before reading error queue
    -v - print detailed results

v2: Enhancements as per Willem de Bruijn <willemb@google.com>
    - Updated control and buffer parameters for recvmsg
    - poll() parameter cleanup
    - fail on bad audit results
    - remove TOS options
    - improved reporting

v3: Enhancements as per Willem de Bruijn <willemb@google.com>
    - add SOF_TIMESTAMPING_OPT_TSONLY to eliminate MSG_TRUNC
    - general code cleanup

Signed-off-by: Fred Klassen <fklassen@appneta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgso_bench_tx.c | 291 ++++++++++++++++++++++++--
 1 file changed, 276 insertions(+), 15 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 4074538b5df5..dfa83ad57206 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -5,6 +5,8 @@
 #include <arpa/inet.h>
 #include <errno.h>
 #include <error.h>
+#include <linux/errqueue.h>
+#include <linux/net_tstamp.h>
 #include <netinet/if_ether.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
@@ -19,6 +21,7 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <sys/time.h>
+#include <sys/poll.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -34,6 +37,10 @@
 #define SO_ZEROCOPY	60
 #endif
 
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY 5
+#endif
+
 #ifndef MSG_ZEROCOPY
 #define MSG_ZEROCOPY	0x4000000
 #endif
@@ -48,12 +55,24 @@ static uint16_t	cfg_mss;
 static int	cfg_payload_len	= (1472 * 42);
 static int	cfg_port	= 8000;
 static int	cfg_runtime_ms	= -1;
+static bool	cfg_poll;
 static bool	cfg_segment;
 static bool	cfg_sendmmsg;
 static bool	cfg_tcp;
+static uint32_t	cfg_tx_ts = SOF_TIMESTAMPING_TX_SOFTWARE;
+static bool	cfg_tx_tstamp;
+static bool	cfg_audit;
+static bool	cfg_verbose;
 static bool	cfg_zerocopy;
 static int	cfg_msg_nr;
 static uint16_t	cfg_gso_size;
+static unsigned long total_num_msgs;
+static unsigned long total_num_sends;
+static unsigned long stat_tx_ts;
+static unsigned long stat_tx_ts_errors;
+static unsigned long tstart;
+static unsigned long tend;
+static unsigned long stat_zcopies;
 
 static socklen_t cfg_alen;
 static struct sockaddr_storage cfg_dst_addr;
@@ -110,23 +129,125 @@ static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
 	}
 }
 
-static void flush_zerocopy(int fd)
+static void flush_cmsg(struct cmsghdr *cmsg)
 {
-	struct msghdr msg = {0};	/* flush */
+	struct sock_extended_err *err;
+	struct scm_timestamping *tss;
+	__u32 lo;
+	__u32 hi;
+	int i;
+
+	switch (cmsg->cmsg_level) {
+	case SOL_SOCKET:
+		if (cmsg->cmsg_type == SO_TIMESTAMPING) {
+			i = (cfg_tx_ts == SOF_TIMESTAMPING_TX_HARDWARE) ? 2 : 0;
+			tss = (struct scm_timestamping *)CMSG_DATA(cmsg);
+			if (tss->ts[i].tv_sec == 0)
+				stat_tx_ts_errors++;
+		} else {
+			error(1, 0, "unknown SOL_SOCKET cmsg type=%u\n",
+			      cmsg->cmsg_type);
+		}
+		break;
+	case SOL_IP:
+	case SOL_IPV6:
+		switch (cmsg->cmsg_type) {
+		case IP_RECVERR:
+		case IPV6_RECVERR:
+		{
+			err = (struct sock_extended_err *)CMSG_DATA(cmsg);
+			switch (err->ee_origin) {
+			case SO_EE_ORIGIN_TIMESTAMPING:
+				/* Got a TX timestamp from error queue */
+				stat_tx_ts++;
+				break;
+			case SO_EE_ORIGIN_ICMP:
+			case SO_EE_ORIGIN_ICMP6:
+				if (cfg_verbose)
+					fprintf(stderr,
+						"received ICMP error: type=%u, code=%u\n",
+						err->ee_type, err->ee_code);
+				break;
+			case SO_EE_ORIGIN_ZEROCOPY:
+			{
+				lo = err->ee_info;
+				hi = err->ee_data;
+				/* range of IDs acknowledged */
+				stat_zcopies += hi - lo + 1;
+				break;
+			}
+			case SO_EE_ORIGIN_LOCAL:
+				if (cfg_verbose)
+					fprintf(stderr,
+						"received packet with local origin: %u\n",
+						err->ee_origin);
+				break;
+			default:
+				error(0, 1, "received packet with origin: %u",
+				      err->ee_origin);
+			}
+			break;
+		}
+		default:
+			error(0, 1, "unknown IP msg type=%u\n",
+			      cmsg->cmsg_type);
+			break;
+		}
+		break;
+	default:
+		error(0, 1, "unknown cmsg level=%u\n",
+		      cmsg->cmsg_level);
+	}
+}
+
+static void flush_errqueue_recv(int fd)
+{
+	char control[CMSG_SPACE(sizeof(struct scm_timestamping)) +
+		     CMSG_SPACE(sizeof(struct sock_extended_err)) +
+		     CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
+	struct msghdr msg = {0};
+	struct cmsghdr *cmsg;
 	int ret;
 
 	while (1) {
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
 		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
 		if (ret == -1 && errno == EAGAIN)
 			break;
 		if (ret == -1)
 			error(1, errno, "errqueue");
-		if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+		if (msg.msg_flags != MSG_ERRQUEUE)
 			error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+		if (cfg_audit) {
+			for (cmsg = CMSG_FIRSTHDR(&msg);
+					cmsg;
+					cmsg = CMSG_NXTHDR(&msg, cmsg))
+				flush_cmsg(cmsg);
+		}
 		msg.msg_flags = 0;
 	}
 }
 
+static void flush_errqueue(int fd, const bool do_poll)
+{
+	if (do_poll) {
+		struct pollfd fds = {0};
+		int ret;
+
+		fds.fd = fd;
+		ret = poll(&fds, 1, 500);
+		if (ret == 0) {
+			if (cfg_verbose)
+				fprintf(stderr, "poll timeout\n");
+		} else if (ret < 0) {
+			error(1, errno, "poll");
+		}
+	}
+
+	flush_errqueue_recv(fd);
+}
+
 static int send_tcp(int fd, char *data)
 {
 	int ret, done = 0, count = 0;
@@ -168,16 +289,40 @@ static int send_udp(int fd, char *data)
 	return count;
 }
 
+static void send_ts_cmsg(struct cmsghdr *cm)
+{
+	uint32_t *valp;
+
+	cm->cmsg_level = SOL_SOCKET;
+	cm->cmsg_type = SO_TIMESTAMPING;
+	cm->cmsg_len = CMSG_LEN(sizeof(cfg_tx_ts));
+	valp = (void *)CMSG_DATA(cm);
+	*valp = cfg_tx_ts;
+}
+
 static int send_udp_sendmmsg(int fd, char *data)
 {
+	char control[CMSG_SPACE(sizeof(cfg_tx_ts))] = {0};
 	const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
 	struct mmsghdr mmsgs[max_nr_msg];
 	struct iovec iov[max_nr_msg];
 	unsigned int off = 0, left;
+	size_t msg_controllen = 0;
 	int i = 0, ret;
 
 	memset(mmsgs, 0, sizeof(mmsgs));
 
+	if (cfg_tx_tstamp) {
+		struct msghdr msg = {0};
+		struct cmsghdr *cmsg;
+
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msg);
+		send_ts_cmsg(cmsg);
+		msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts));
+	}
+
 	left = cfg_payload_len;
 	while (left) {
 		if (i == max_nr_msg)
@@ -189,6 +334,13 @@ static int send_udp_sendmmsg(int fd, char *data)
 		mmsgs[i].msg_hdr.msg_iov = iov + i;
 		mmsgs[i].msg_hdr.msg_iovlen = 1;
 
+		mmsgs[i].msg_hdr.msg_name = (void *)&cfg_dst_addr;
+		mmsgs[i].msg_hdr.msg_namelen = cfg_alen;
+		if (msg_controllen) {
+			mmsgs[i].msg_hdr.msg_control = control;
+			mmsgs[i].msg_hdr.msg_controllen = msg_controllen;
+		}
+
 		off += iov[i].iov_len;
 		left -= iov[i].iov_len;
 		i++;
@@ -214,9 +366,12 @@ static void send_udp_segment_cmsg(struct cmsghdr *cm)
 
 static int send_udp_segment(int fd, char *data)
 {
-	char control[CMSG_SPACE(sizeof(cfg_gso_size))] = {0};
+	char control[CMSG_SPACE(sizeof(cfg_gso_size)) +
+		     CMSG_SPACE(sizeof(cfg_tx_ts))] = {0};
 	struct msghdr msg = {0};
 	struct iovec iov = {0};
+	size_t msg_controllen;
+	struct cmsghdr *cmsg;
 	int ret;
 
 	iov.iov_base = data;
@@ -227,8 +382,16 @@ static int send_udp_segment(int fd, char *data)
 
 	msg.msg_control = control;
 	msg.msg_controllen = sizeof(control);
-	send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+	cmsg = CMSG_FIRSTHDR(&msg);
+	send_udp_segment_cmsg(cmsg);
+	msg_controllen = CMSG_SPACE(sizeof(cfg_mss));
+	if (cfg_tx_tstamp) {
+		cmsg = CMSG_NXTHDR(&msg, cmsg);
+		send_ts_cmsg(cmsg);
+		msg_controllen += CMSG_SPACE(sizeof(cfg_tx_ts));
+	}
 
+	msg.msg_controllen = msg_controllen;
 	msg.msg_name = (void *)&cfg_dst_addr;
 	msg.msg_namelen = cfg_alen;
 
@@ -243,7 +406,7 @@ static int send_udp_segment(int fd, char *data)
 
 static void usage(const char *filepath)
 {
-	error(1, 0, "Usage: %s [-46cmtuz] [-C cpu] [-D dst ip] [-l secs] [-m messagenr] [-p port] [-s sendsize] [-S gsosize]",
+	error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
 		    filepath);
 }
 
@@ -252,7 +415,7 @@ static void parse_opts(int argc, char **argv)
 	int max_len, hdrlen;
 	int c;
 
-	while ((c = getopt(argc, argv, "46cC:D:l:mM:p:s:S:tuz")) != -1) {
+	while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) {
 		switch (c) {
 		case '4':
 			if (cfg_family != PF_UNSPEC)
@@ -266,6 +429,9 @@ static void parse_opts(int argc, char **argv)
 			cfg_family = PF_INET6;
 			cfg_alen = sizeof(struct sockaddr_in6);
 			break;
+		case 'a':
+			cfg_audit = true;
+			break;
 		case 'c':
 			cfg_cache_trash = true;
 			break;
@@ -287,6 +453,9 @@ static void parse_opts(int argc, char **argv)
 		case 'p':
 			cfg_port = strtoul(optarg, NULL, 0);
 			break;
+		case 'P':
+			cfg_poll = true;
+			break;
 		case 's':
 			cfg_payload_len = strtoul(optarg, NULL, 0);
 			break;
@@ -294,12 +463,22 @@ static void parse_opts(int argc, char **argv)
 			cfg_gso_size = strtoul(optarg, NULL, 0);
 			cfg_segment = true;
 			break;
+		case 'H':
+			cfg_tx_ts = SOF_TIMESTAMPING_TX_HARDWARE;
+			cfg_tx_tstamp = true;
+			break;
 		case 't':
 			cfg_tcp = true;
 			break;
+		case 'T':
+			cfg_tx_tstamp = true;
+			break;
 		case 'u':
 			cfg_connected = false;
 			break;
+		case 'v':
+			cfg_verbose = true;
+			break;
 		case 'z':
 			cfg_zerocopy = true;
 			break;
@@ -315,6 +494,8 @@ static void parse_opts(int argc, char **argv)
 		error(1, 0, "connectionless tcp makes no sense");
 	if (cfg_segment && cfg_sendmmsg)
 		error(1, 0, "cannot combine segment offload and sendmmsg");
+	if (cfg_tx_tstamp && !(cfg_segment || cfg_sendmmsg))
+		error(1, 0, "Options -T and -H require either -S or -m option");
 
 	if (cfg_family == PF_INET)
 		hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
@@ -349,6 +530,75 @@ static void set_pmtu_discover(int fd, bool is_ipv4)
 		error(1, errno, "setsockopt path mtu");
 }
 
+static void set_tx_timestamping(int fd)
+{
+	int val = SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID |
+			SOF_TIMESTAMPING_OPT_TSONLY;
+
+	if (cfg_tx_ts == SOF_TIMESTAMPING_TX_SOFTWARE)
+		val |= SOF_TIMESTAMPING_SOFTWARE;
+	else
+		val |= SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)))
+		error(1, errno, "setsockopt tx timestamping");
+}
+
+static void print_audit_report(unsigned long num_msgs, unsigned long num_sends)
+{
+	unsigned long tdelta;
+
+	tdelta = tend - tstart;
+	if (!tdelta)
+		return;
+
+	fprintf(stderr, "Summary over %lu.%03lu seconds...\n",
+			tdelta / 1000, tdelta % 1000);
+	fprintf(stderr,
+		"sum %s tx: %6lu MB/s %10lu calls (%lu/s) %10lu msgs (%lu/s)\n",
+		cfg_tcp ? "tcp" : "udp",
+		((num_msgs * cfg_payload_len) >> 10) / tdelta,
+		num_sends, num_sends * 1000 / tdelta,
+		num_msgs, num_msgs * 1000 / tdelta);
+
+	if (cfg_tx_tstamp) {
+		if (stat_tx_ts_errors)
+			error(1, 0,
+			      "Expected clean TX Timestamps: %9lu msgs received %6lu errors",
+			      stat_tx_ts, stat_tx_ts_errors);
+		if (stat_tx_ts != num_sends)
+			error(1, 0,
+			      "Unexpected number of TX Timestamps: %9lu expected %9lu received",
+			      num_sends, stat_tx_ts);
+		fprintf(stderr,
+			"Tx Timestamps: %19lu received %17lu errors\n",
+			stat_tx_ts, stat_tx_ts_errors);
+	}
+
+	if (cfg_zerocopy) {
+		if (stat_zcopies != num_sends)
+			error(1, 0, "Unexpected number of Zerocopy completions: %9lu expected %9lu received",
+			      num_sends, stat_zcopies);
+		fprintf(stderr,
+			"Zerocopy acks: %19lu\n",
+			stat_zcopies);
+	}
+}
+
+static void print_report(unsigned long num_msgs, unsigned long num_sends)
+{
+	fprintf(stderr,
+		"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+		cfg_tcp ? "tcp" : "udp",
+		(num_msgs * cfg_payload_len) >> 20,
+		num_sends, num_msgs);
+
+	if (cfg_audit) {
+		total_num_msgs += num_msgs;
+		total_num_sends += num_sends;
+	}
+}
+
 int main(int argc, char **argv)
 {
 	unsigned long num_msgs, num_sends;
@@ -384,8 +634,13 @@ int main(int argc, char **argv)
 	if (cfg_segment)
 		set_pmtu_discover(fd, cfg_family == PF_INET);
 
+	if (cfg_tx_tstamp)
+		set_tx_timestamping(fd);
+
 	num_msgs = num_sends = 0;
 	tnow = gettimeofday_ms();
+	tstart = tnow;
+	tend = tnow;
 	tstop = tnow + cfg_runtime_ms;
 	treport = tnow + 1000;
 
@@ -400,19 +655,15 @@ int main(int argc, char **argv)
 		else
 			num_sends += send_udp(fd, buf[i]);
 		num_msgs++;
-		if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
-			flush_zerocopy(fd);
+		if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp)
+			flush_errqueue(fd, cfg_poll);
 
 		if (cfg_msg_nr && num_msgs >= cfg_msg_nr)
 			break;
 
 		tnow = gettimeofday_ms();
-		if (tnow > treport) {
-			fprintf(stderr,
-				"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
-				cfg_tcp ? "tcp" : "udp",
-				(num_msgs * cfg_payload_len) >> 20,
-				num_sends, num_msgs);
+		if (tnow >= treport) {
+			print_report(num_msgs, num_sends);
 			num_msgs = num_sends = 0;
 			treport = tnow + 1000;
 		}
@@ -423,8 +674,18 @@ int main(int argc, char **argv)
 
 	} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
 
+	if (cfg_zerocopy || cfg_tx_tstamp)
+		flush_errqueue(fd, true);
+
 	if (close(fd))
 		error(1, errno, "close");
 
+	if (cfg_audit) {
+		tend = tnow;
+		total_num_msgs += num_msgs;
+		total_num_sends += num_sends;
+		print_audit_report(total_num_msgs, total_num_sends);
+	}
+
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From ade90d69ffefd99e5e655ec6d976a96f010bb5b3 Mon Sep 17 00:00:00 2001
From: Fred Klassen <fklassen@appneta.com>
Date: Mon, 17 Jun 2019 12:08:36 -0700
Subject: net/udpgso_bench.sh add UDP GSO audit tests

Audit tests count the total number of messages sent and compares
with total number of CMSG received on error queue. Example:

    udp gso zerocopy timestamp audit
    udp rx:   1599 MB/s  1166414 calls/s
    udp tx:   1615 MB/s    27395 calls/s  27395 msg/s
    udp rx:   1634 MB/s  1192261 calls/s
    udp tx:   1633 MB/s    27699 calls/s  27699 msg/s
    udp rx:   1633 MB/s  1191358 calls/s
    udp tx:   1631 MB/s    27678 calls/s  27678 msg/s
    Summary over 4.000 seconds...
    sum udp tx:   1665 MB/s      82772 calls (27590/s)      82772 msgs (27590/s)
    Tx Timestamps:               82772 received                 0 errors
    Zerocopy acks:               82772 received

Errors are thrown if CMSG count does not equal send count,
example:

    Summary over 4.000 seconds...
    sum tcp tx:   7451 MB/s     493706 calls (123426/s)     493706 msgs (123426/s)
    ./udpgso_bench_tx: Unexpected number of Zerocopy completions:    493706 expected    493704 received

Also reduce individual test time from 4 to 3 seconds so that
overall test time does not increase significantly.

v3: Enhancements as per Willem de Bruijn <willemb@google.com>
    - document -P option for TCP audit

Signed-off-by: Fred Klassen <fklassen@appneta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgso_bench.sh | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 5670a9ffd8eb..d4d831dfd44d 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -38,6 +38,18 @@ run_udp() {
 
 	echo "udp gso zerocopy"
 	run_in_netns ${args} -S 0 -z
+
+	echo "udp gso timestamp"
+	run_in_netns ${args} -S 0 -T
+
+	echo "udp gso zerocopy audit"
+	run_in_netns ${args} -S 0 -z -a
+
+	echo "udp gso timestamp audit"
+	run_in_netns ${args} -S 0 -T -a
+
+	echo "udp gso zerocopy timestamp audit"
+	run_in_netns ${args} -S 0 -T -z -a
 }
 
 run_tcp() {
@@ -48,10 +60,15 @@ run_tcp() {
 
 	echo "tcp zerocopy"
 	run_in_netns ${args} -t -z
+
+	# excluding for now because test fails intermittently
+	# add -P option to include poll() to reduce possibility of lost messages
+	#echo "tcp zerocopy audit"
+	#run_in_netns ${args} -t -z -P -a
 }
 
 run_all() {
-	local -r core_args="-l 4"
+	local -r core_args="-l 3"
 	local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
 	local -r ipv6_args="${core_args} -6 -D ::1"
 
-- 
cgit v1.2.3-59-g8ed1b


From 4ffc37f5c07791ffe1de59a7c08fdf8d7f585a60 Mon Sep 17 00:00:00 2001
From: Fred Klassen <fklassen@appneta.com>
Date: Mon, 17 Jun 2019 12:08:37 -0700
Subject: net/udpgso_bench.sh test fails on error

Ensure that failure on any individual test results in an overall
failure of the test script.

Signed-off-by: Fred Klassen <fklassen@appneta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgso_bench.sh | 33 +++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index d4d831dfd44d..4df1cd8d69d2 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -3,6 +3,10 @@
 #
 # Run a series of udpgso benchmarks
 
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
 wake_children() {
 	local -r jobs="$(jobs -p)"
 
@@ -29,60 +33,89 @@ run_in_netns() {
 
 run_udp() {
 	local -r args=$@
+	local errors=0
 
 	echo "udp"
 	run_in_netns ${args}
+	errors=$(( $errors + $? ))
 
 	echo "udp gso"
 	run_in_netns ${args} -S 0
+	errors=$(( $errors + $? ))
 
 	echo "udp gso zerocopy"
 	run_in_netns ${args} -S 0 -z
+	errors=$(( $errors + $? ))
 
 	echo "udp gso timestamp"
 	run_in_netns ${args} -S 0 -T
+	errors=$(( $errors + $? ))
 
 	echo "udp gso zerocopy audit"
 	run_in_netns ${args} -S 0 -z -a
+	errors=$(( $errors + $? ))
 
 	echo "udp gso timestamp audit"
 	run_in_netns ${args} -S 0 -T -a
+	errors=$(( $errors + $? ))
 
 	echo "udp gso zerocopy timestamp audit"
 	run_in_netns ${args} -S 0 -T -z -a
+	errors=$(( $errors + $? ))
+
+	return $errors
 }
 
 run_tcp() {
 	local -r args=$@
+	local errors=0
 
 	echo "tcp"
 	run_in_netns ${args} -t
+	errors=$(( $errors + $? ))
 
 	echo "tcp zerocopy"
 	run_in_netns ${args} -t -z
+	errors=$(( $errors + $? ))
 
 	# excluding for now because test fails intermittently
 	# add -P option to include poll() to reduce possibility of lost messages
 	#echo "tcp zerocopy audit"
 	#run_in_netns ${args} -t -z -P -a
+	#errors=$(( $errors + $? ))
+
+	return $errors
 }
 
 run_all() {
 	local -r core_args="-l 3"
 	local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
 	local -r ipv6_args="${core_args} -6 -D ::1"
+	local errors=0
 
 	echo "ipv4"
 	run_tcp "${ipv4_args}"
+	errors=$(( $errors + $? ))
 	run_udp "${ipv4_args}"
+	errors=$(( $errors + $? ))
 
 	echo "ipv6"
 	run_tcp "${ipv4_args}"
+	errors=$(( $errors + $? ))
 	run_udp "${ipv6_args}"
+	errors=$(( $errors + $? ))
+
+	return $errors
 }
 
 if [[ $# -eq 0 ]]; then
 	run_all
+	if [ $? -ne 0 ]; then
+		echo -e "$(basename $0): ${RED}FAIL${NC}"
+		exit 1
+	fi
+
+	echo -e "$(basename $0): ${GREEN}PASS${NC}"
 elif [[ $1 == "__subprocess" ]]; then
 	shift
 	run_one $@
-- 
cgit v1.2.3-59-g8ed1b


From 12ee8220399995cc63fe590d9b01e94921478ad8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 18 Jun 2019 18:12:58 +0300
Subject: selftests: mlxsw: Add a test for FIB offload indication

Test that the offload indication for unicast routes is correctly set in
different scenarios. IPv4 support will be added in the future.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/mlxsw/fib_offload.sh     | 349 +++++++++++++++++++++
 1 file changed, 349 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
new file mode 100755
index 000000000000..e99ae500f387
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
@@ -0,0 +1,349 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test unicast FIB offload indication.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv6_route_add
+	ipv6_route_replace
+	ipv6_route_nexthop_group_share
+	ipv6_route_rate
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+tor1_create()
+{
+	simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor1_destroy()
+{
+	simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor2_create()
+{
+	simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+tor2_destroy()
+{
+	simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+spine_create()
+{
+	ip link set dev $spine_p1 up
+	ip link set dev $spine_p2 up
+
+	__addr_add_del $spine_p1 add 2001:db8:1::1/64
+	__addr_add_del $spine_p2 add 2001:db8:2::1/64
+}
+
+spine_destroy()
+{
+	__addr_add_del $spine_p2 del 2001:db8:2::1/64
+	__addr_add_del $spine_p1 del 2001:db8:1::1/64
+
+	ip link set dev $spine_p2 down
+	ip link set dev $spine_p1 down
+}
+
+ipv6_offload_check()
+{
+	local pfx="$1"; shift
+	local expected_num=$1; shift
+	local num
+
+	# Try to avoid races with route offload
+	sleep .1
+
+	num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l)
+
+	if [ $num -eq $expected_num ]; then
+		return 0
+	fi
+
+	return 1
+}
+
+ipv6_route_add_prefix()
+{
+	RET=0
+
+	# Add a prefix route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "prefix route not offloaded"
+
+	# Append an identical prefix route with an higher metric and check that
+	# offload indication did not change.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend an identical prefix route with lower metric and check that
+	# it is offloaded and the others are not.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Delete the routes and add the same route with a different nexthop
+	# device. Check that it is offloaded.
+	ip -6 route flush 2001:db8:3::/64 dev $spine_p1
+	ip -6 route add 2001:db8:3::/64 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1
+
+	log_test "IPv6 prefix route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add_mpath()
+{
+	RET=0
+
+	# Add a multipath route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded when should"
+
+	# Append another nexthop and check that it is offloaded as well.
+	ip -6 route append 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 3
+	check_err $? "appended nexthop not offloaded when should"
+
+	# Mimic route replace by removing the route and adding it back with
+	# only two nexthops.
+	ip -6 route del 2001:db8:3::/64
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after delete & add"
+
+	# Append a nexthop with an higher metric and check that the offload
+	# indication did not change.
+	ip -6 route append 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend a nexthop with a lower metric and check that it is offloaded
+	# and the others are not.
+	ip -6 route append 2001:db8:3::/64 metric 10 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	log_test "IPv6 multipath route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add()
+{
+	ipv6_route_add_prefix
+	ipv6_route_add_mpath
+}
+
+ipv6_route_replace()
+{
+	RET=0
+
+	# Replace prefix route with prefix route.
+	ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded when should"
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded after replace"
+
+	# Replace prefix route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replace"
+
+	# Replace multipath route with prefix route. A prefix route cannot
+	# replace a multipath route, so it is appended.
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0
+	check_err $? "prefix route offloaded after 'replacing' multipath route"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after being 'replaced' by prefix route"
+
+	# Replace multipath route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replacing multipath route"
+
+	# Replace a non-existing multipath route with a multipath route and
+	# check that it is appended and not offloaded.
+	ip -6 route replace 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after non-existing route was 'replaced'"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "multipath route offloaded after 'replacing' non-existing route"
+
+	log_test "IPv6 route replace"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_nexthop_group_share()
+{
+	RET=0
+
+	# The driver consolidates identical nexthop groups in order to reduce
+	# the resource usage in its adjacency table. Check that the deletion
+	# of one multipath route using the group does not affect the other.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route add 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ip -6 route del 2001:db8:3::/64
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group"
+
+	# Check that after unsharing a nexthop group the routes are still
+	# marked as offloaded.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route del 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1
+	ipv6_offload_check "2001:db8:4::/64" 1
+	check_err $? "singlepath route not offloaded after unsharing the nexthop group"
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded after unsharing the nexthop group"
+
+	log_test "IPv6 nexthop group sharing"
+
+	ip -6 route flush 2001:db8:3::/64
+	ip -6 route flush 2001:db8:4::/64
+}
+
+ipv6_route_rate()
+{
+	local batch_dir=$(mktemp -d)
+	local num_rts=$((40 * 1024))
+	local num_nhs=16
+	local total
+	local start
+	local diff
+	local end
+	local nhs
+	local i
+
+	RET=0
+
+	# Prepare 40K /64 multipath routes with 16 nexthops each and check how
+	# long it takes to add them. A limit of 60 seconds is set. It is much
+	# higher than insertion should take and meant to flag a serious
+	# regression.
+	total=$((nums_nhs * num_rts))
+
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1
+		nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1"
+	done
+
+	for i in $(seq 1 $num_rts); do
+		echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/add.batch
+		echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/del.batch
+	done
+
+	start=$(date +%s.%N)
+
+	ip -batch $batch_dir/add.batch
+	count=$(ip -6 route show | grep offload | wc -l)
+	while [ $count -lt $total ]; do
+		sleep .01
+		count=$(ip -6 route show | grep offload | wc -l)
+	done
+
+	end=$(date +%s.%N)
+
+	diff=$(echo "$end - $start" | bc -l)
+	test "$(echo "$diff > 60" | bc -l)" -eq 0
+	check_err $? "route insertion took too long"
+	log_info "inserted $num_rts routes in $diff seconds"
+
+	log_test "IPv6 routes insertion rate"
+
+	ip -batch $batch_dir/del.batch
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1
+	done
+	rm -rf $batch_dir
+}
+
+setup_prepare()
+{
+	spine_p1=${NETIFS[p1]}
+	tor1_p1=${NETIFS[p2]}
+
+	spine_p2=${NETIFS[p3]}
+	tor2_p1=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	tor1_create
+	tor2_create
+	spine_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	spine_destroy
+	tor2_destroy
+	tor1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-59-g8ed1b


From a324aae32fa9bfdd03e89078e20ebcbd7737fda5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 17 Jun 2019 15:48:58 -0700
Subject: libbpf: constify getter APIs

Add const qualifiers to bpf_object/bpf_program/bpf_map arguments for
getter APIs. There is no need for them to not be const pointers.

Verified that

make -C tools/lib/bpf
make -C tools/testing/selftests/bpf
make -C tools/perf

all build without warnings.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 80 ++++++++++++++++++++++++++------------------------
 tools/lib/bpf/libbpf.h | 62 +++++++++++++++++++-------------------
 2 files changed, 72 insertions(+), 70 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 585e3a2f1eb4..8ce3beba8551 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1663,7 +1663,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
 }
 
 struct bpf_program *
-bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+				  const char *title)
 {
 	struct bpf_program *pos;
 
@@ -2589,8 +2590,8 @@ out:
 	return err;
 }
 
-static bool bpf_program__is_function_storage(struct bpf_program *prog,
-					     struct bpf_object *obj)
+static bool bpf_program__is_function_storage(const struct bpf_program *prog,
+					     const struct bpf_object *obj)
 {
 	return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
 }
@@ -3296,17 +3297,17 @@ bpf_object__next(struct bpf_object *prev)
 	return next;
 }
 
-const char *bpf_object__name(struct bpf_object *obj)
+const char *bpf_object__name(const struct bpf_object *obj)
 {
 	return obj ? obj->path : ERR_PTR(-EINVAL);
 }
 
-unsigned int bpf_object__kversion(struct bpf_object *obj)
+unsigned int bpf_object__kversion(const struct bpf_object *obj)
 {
 	return obj ? obj->kern_version : 0;
 }
 
-struct btf *bpf_object__btf(struct bpf_object *obj)
+struct btf *bpf_object__btf(const struct bpf_object *obj)
 {
 	return obj ? obj->btf : NULL;
 }
@@ -3327,13 +3328,14 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 	return 0;
 }
 
-void *bpf_object__priv(struct bpf_object *obj)
+void *bpf_object__priv(const struct bpf_object *obj)
 {
 	return obj ? obj->priv : ERR_PTR(-EINVAL);
 }
 
 static struct bpf_program *
-__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward)
+__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
+		    bool forward)
 {
 	size_t nr_programs = obj->nr_programs;
 	ssize_t idx;
@@ -3358,7 +3360,7 @@ __bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward)
 }
 
 struct bpf_program *
-bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
 {
 	struct bpf_program *prog = prev;
 
@@ -3370,7 +3372,7 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
 }
 
 struct bpf_program *
-bpf_program__prev(struct bpf_program *next, struct bpf_object *obj)
+bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
 {
 	struct bpf_program *prog = next;
 
@@ -3392,7 +3394,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 	return 0;
 }
 
-void *bpf_program__priv(struct bpf_program *prog)
+void *bpf_program__priv(const struct bpf_program *prog)
 {
 	return prog ? prog->priv : ERR_PTR(-EINVAL);
 }
@@ -3402,7 +3404,7 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
 	prog->prog_ifindex = ifindex;
 }
 
-const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
 {
 	const char *title;
 
@@ -3418,7 +3420,7 @@ const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
 	return title;
 }
 
-int bpf_program__fd(struct bpf_program *prog)
+int bpf_program__fd(const struct bpf_program *prog)
 {
 	return bpf_program__nth_fd(prog, 0);
 }
@@ -3451,7 +3453,7 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
 	return 0;
 }
 
-int bpf_program__nth_fd(struct bpf_program *prog, int n)
+int bpf_program__nth_fd(const struct bpf_program *prog, int n)
 {
 	int fd;
 
@@ -3479,25 +3481,25 @@ void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 	prog->type = type;
 }
 
-static bool bpf_program__is_type(struct bpf_program *prog,
+static bool bpf_program__is_type(const struct bpf_program *prog,
 				 enum bpf_prog_type type)
 {
 	return prog ? (prog->type == type) : false;
 }
 
-#define BPF_PROG_TYPE_FNS(NAME, TYPE)			\
-int bpf_program__set_##NAME(struct bpf_program *prog)	\
-{							\
-	if (!prog)					\
-		return -EINVAL;				\
-	bpf_program__set_type(prog, TYPE);		\
-	return 0;					\
-}							\
-							\
-bool bpf_program__is_##NAME(struct bpf_program *prog)	\
-{							\
-	return bpf_program__is_type(prog, TYPE);	\
-}							\
+#define BPF_PROG_TYPE_FNS(NAME, TYPE)				\
+int bpf_program__set_##NAME(struct bpf_program *prog)		\
+{								\
+	if (!prog)						\
+		return -EINVAL;					\
+	bpf_program__set_type(prog, TYPE);			\
+	return 0;						\
+}								\
+								\
+bool bpf_program__is_##NAME(const struct bpf_program *prog)	\
+{								\
+	return bpf_program__is_type(prog, TYPE);		\
+}								\
 
 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
@@ -3692,17 +3694,17 @@ bpf_program__identify_section(struct bpf_program *prog,
 					expected_attach_type);
 }
 
-int bpf_map__fd(struct bpf_map *map)
+int bpf_map__fd(const struct bpf_map *map)
 {
 	return map ? map->fd : -EINVAL;
 }
 
-const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
 {
 	return map ? &map->def : ERR_PTR(-EINVAL);
 }
 
-const char *bpf_map__name(struct bpf_map *map)
+const char *bpf_map__name(const struct bpf_map *map)
 {
 	return map ? map->name : NULL;
 }
@@ -3733,17 +3735,17 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
 	return 0;
 }
 
-void *bpf_map__priv(struct bpf_map *map)
+void *bpf_map__priv(const struct bpf_map *map)
 {
 	return map ? map->priv : ERR_PTR(-EINVAL);
 }
 
-bool bpf_map__is_offload_neutral(struct bpf_map *map)
+bool bpf_map__is_offload_neutral(const struct bpf_map *map)
 {
 	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
 }
 
-bool bpf_map__is_internal(struct bpf_map *map)
+bool bpf_map__is_internal(const struct bpf_map *map)
 {
 	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 }
@@ -3768,7 +3770,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 }
 
 static struct bpf_map *
-__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)
+__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 {
 	ssize_t idx;
 	struct bpf_map *s, *e;
@@ -3792,7 +3794,7 @@ __bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i)
 }
 
 struct bpf_map *
-bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
+bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
 {
 	if (prev == NULL)
 		return obj->maps;
@@ -3801,7 +3803,7 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
 }
 
 struct bpf_map *
-bpf_map__prev(struct bpf_map *next, struct bpf_object *obj)
+bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
 {
 	if (next == NULL) {
 		if (!obj->nr_maps)
@@ -3813,7 +3815,7 @@ bpf_map__prev(struct bpf_map *next, struct bpf_object *obj)
 }
 
 struct bpf_map *
-bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
 {
 	struct bpf_map *pos;
 
@@ -3825,7 +3827,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
 }
 
 int
-bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name)
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 {
 	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 2e594a0fa961..d639f47e3110 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -98,15 +98,16 @@ struct bpf_object_load_attr {
 LIBBPF_API int bpf_object__load(struct bpf_object *obj);
 LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
 LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
-LIBBPF_API const char *bpf_object__name(struct bpf_object *obj);
-LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj);
+LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
+LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
 
 struct btf;
-LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj);
+LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
 LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
 
 LIBBPF_API struct bpf_program *
-bpf_object__find_program_by_title(struct bpf_object *obj, const char *title);
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+				  const char *title);
 
 LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
 #define bpf_object__for_each_safe(pos, tmp)			\
@@ -118,7 +119,7 @@ LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
 typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
 LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 				    bpf_object_clear_priv_t clear_priv);
-LIBBPF_API void *bpf_object__priv(struct bpf_object *prog);
+LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
 
 LIBBPF_API int
 libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
@@ -129,7 +130,7 @@ LIBBPF_API int libbpf_attach_type_by_name(const char *name,
 /* Accessors of bpf_program */
 struct bpf_program;
 LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
-						 struct bpf_object *obj);
+						 const struct bpf_object *obj);
 
 #define bpf_object__for_each_program(pos, obj)		\
 	for ((pos) = bpf_program__next(NULL, (obj));	\
@@ -137,24 +138,23 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
 	     (pos) = bpf_program__next((pos), (obj)))
 
 LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
-						 struct bpf_object *obj);
+						 const struct bpf_object *obj);
 
-typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
-					 void *);
+typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
 
 LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 				     bpf_program_clear_priv_t clear_priv);
 
-LIBBPF_API void *bpf_program__priv(struct bpf_program *prog);
+LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
 					 __u32 ifindex);
 
-LIBBPF_API const char *bpf_program__title(struct bpf_program *prog,
+LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
 					  bool needs_copy);
 
 LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
 				 __u32 kern_version);
-LIBBPF_API int bpf_program__fd(struct bpf_program *prog);
+LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
 LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
 					 const char *path,
 					 int instance);
@@ -227,7 +227,7 @@ typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
 LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
 				     bpf_program_prep_t prep);
 
-LIBBPF_API int bpf_program__nth_fd(struct bpf_program *prog, int n);
+LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
 
 /*
  * Adjust type of BPF program. Default is kprobe.
@@ -246,14 +246,14 @@ LIBBPF_API void
 bpf_program__set_expected_attach_type(struct bpf_program *prog,
 				      enum bpf_attach_type type);
 
-LIBBPF_API bool bpf_program__is_socket_filter(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_tracepoint(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_kprobe(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_sched_cls(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_sched_act(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_xdp(struct bpf_program *prog);
-LIBBPF_API bool bpf_program__is_perf_event(struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
 
 /*
  * No need for __attribute__((packed)), all members of 'bpf_map_def'
@@ -275,10 +275,10 @@ struct bpf_map_def {
  */
 struct bpf_map;
 LIBBPF_API struct bpf_map *
-bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
 
 LIBBPF_API int
-bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name);
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
 
 /*
  * Get bpf_map through the offset of corresponding struct bpf_map_def
@@ -288,7 +288,7 @@ LIBBPF_API struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
 
 LIBBPF_API struct bpf_map *
-bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
+bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
 #define bpf_object__for_each_map(pos, obj)		\
 	for ((pos) = bpf_map__next(NULL, (obj));	\
 	     (pos) != NULL;				\
@@ -296,22 +296,22 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
 #define bpf_map__for_each bpf_object__for_each_map
 
 LIBBPF_API struct bpf_map *
-bpf_map__prev(struct bpf_map *map, struct bpf_object *obj);
+bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
 
-LIBBPF_API int bpf_map__fd(struct bpf_map *map);
-LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
-LIBBPF_API const char *bpf_map__name(struct bpf_map *map);
+LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
 LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
 LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
 
 typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
 LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
 				 bpf_map_clear_priv_t clear_priv);
-LIBBPF_API void *bpf_map__priv(struct bpf_map *map);
+LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
 LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
-LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map);
-LIBBPF_API bool bpf_map__is_internal(struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
 LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
-- 
cgit v1.2.3-59-g8ed1b


From fc559a70d57c6ee5443f7a750858503e94cdc941 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sat, 15 Jun 2019 12:12:18 -0700
Subject: selftests/bpf: fix tests due to const spill/fill

fix tests that incorrectly assumed that the verifier
cannot track constants through stack.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/verifier/direct_packet_access.c  |  3 ++-
 .../selftests/bpf/verifier/helper_access_var_len.c | 28 ++++++++++++----------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index d5c596fdc4b9..2c5fbe7bcd27 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -511,7 +511,8 @@
 		    offsetof(struct __sk_buff, data)),
 	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
 		    offsetof(struct __sk_buff, data_end)),
-	BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, mark)),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 1f39d845c64f..67ab12410050 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -29,9 +29,9 @@
 {
 	"helper access to variable memory: stack, bitwise AND, zero included",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -46,9 +46,9 @@
 {
 	"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
@@ -122,9 +122,9 @@
 {
 	"helper access to variable memory: stack, JMP, bounds + offset",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
@@ -143,9 +143,9 @@
 {
 	"helper access to variable memory: stack, JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
@@ -163,9 +163,9 @@
 {
 	"helper access to variable memory: stack, JMP, no max check",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_MOV64_IMM(BPF_REG_4, 0),
@@ -183,9 +183,9 @@
 {
 	"helper access to variable memory: stack, JMP, no min check",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
@@ -201,9 +201,9 @@
 {
 	"helper access to variable memory: stack, JMP (signed), no min check",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
-	BPF_MOV64_IMM(BPF_REG_2, 16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
@@ -244,6 +244,7 @@
 {
 	"helper access to variable memory: map, JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -251,7 +252,7 @@
 	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4),
@@ -262,7 +263,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_48b = { 3 },
+	.fixup_map_hash_48b = { 4 },
 	.errstr = "invalid access to map value, value_size=48 off=0 size=49",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -296,6 +297,7 @@
 {
 	"helper access to variable memory: map adjusted, JMP, wrong max",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
@@ -304,7 +306,7 @@
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
-	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4),
@@ -315,7 +317,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.fixup_map_hash_48b = { 3 },
+	.fixup_map_hash_48b = { 4 },
 	.errstr = "R1 min value is outside of the array range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -337,8 +339,8 @@
 {
 	"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
 	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
 	BPF_MOV64_IMM(BPF_REG_1, 0),
-	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@ -562,6 +564,7 @@
 {
 	"helper access to variable memory: 8 bytes leak",
 	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -572,7 +575,6 @@
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
-	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
-- 
cgit v1.2.3-59-g8ed1b


From aeee380ccfc59c05a4c6e8e762fd645c6bdc773a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sat, 15 Jun 2019 12:12:22 -0700
Subject: selftests/bpf: fix tests

Fix tests that assumed no loops.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c  | 11 ++++-------
 tools/testing/selftests/bpf/verifier/calls.c | 22 +++++++++++++---------
 tools/testing/selftests/bpf/verifier/cfg.c   | 11 +++++++----
 3 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index cd0248c54e25..93e1d87a343a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -237,10 +237,10 @@ static void bpf_fill_scale1(struct bpf_test *self)
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % 64 + 1));
 	}
-	/* every jump adds 1 step to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
+	/* is_state_visited() doesn't allocate state for pruning for every jump.
+	 * Hence multiply jmps by 4 to accommodate that heuristic
 	 */
-	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
 		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
@@ -269,10 +269,7 @@ static void bpf_fill_scale2(struct bpf_test *self)
 		insn[i++] = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6,
 					-8 * (k % (64 - 4 * FUNC_NEST) + 1));
 	}
-	/* every jump adds 1 step to insn_processed, so to stay exactly
-	 * within 1m limit add MAX_TEST_INSNS - MAX_JMP_SEQ - 1 MOVs and 1 EXIT
-	 */
-	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ - 1)
+	while (i < MAX_TEST_INSNS - MAX_JMP_SEQ * 4)
 		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 42);
 	insn[i] = BPF_EXIT_INSN();
 	self->prog_len = i + 1;
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 9093a8f64dc6..2d752c4f8d9d 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -215,9 +215,11 @@
 	BPF_MOV64_IMM(BPF_REG_0, 3),
 	BPF_JMP_IMM(BPF_JA, 0, 0, -6),
 	},
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge from insn",
-	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+	.errstr_unpriv = "back-edge from insn",
+	.result_unpriv = REJECT,
+	.result = ACCEPT,
+	.retval = 1,
 },
 {
 	"calls: conditional call 4",
@@ -250,22 +252,24 @@
 	BPF_MOV64_IMM(BPF_REG_0, 3),
 	BPF_EXIT_INSN(),
 	},
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge from insn",
-	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.retval = 1,
 },
 {
 	"calls: conditional call 6",
 	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
 	BPF_EXIT_INSN(),
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, mark)),
 	BPF_EXIT_INSN(),
 	},
-	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge from insn",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.errstr = "infinite loop detected",
 	.result = REJECT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c
index 349c0862fb4c..4eb76ed739ce 100644
--- a/tools/testing/selftests/bpf/verifier/cfg.c
+++ b/tools/testing/selftests/bpf/verifier/cfg.c
@@ -41,7 +41,8 @@
 	BPF_JMP_IMM(BPF_JA, 0, 0, -1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "back-edge",
+	.errstr = "unreachable insn 1",
+	.errstr_unpriv = "back-edge",
 	.result = REJECT,
 },
 {
@@ -53,18 +54,20 @@
 	BPF_JMP_IMM(BPF_JA, 0, 0, -4),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "back-edge",
+	.errstr = "unreachable insn 4",
+	.errstr_unpriv = "back-edge",
 	.result = REJECT,
 },
 {
 	"conditional loop",
 	.insns = {
-	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "back-edge",
+	.errstr = "infinite loop detected",
+	.errstr_unpriv = "back-edge",
 	.result = REJECT,
 },
-- 
cgit v1.2.3-59-g8ed1b


From 0d3679e99ae4b7868da22e3b8540fd597df501f5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sat, 15 Jun 2019 12:12:23 -0700
Subject: selftests/bpf: add basic verifier tests for loops

This set of tests is a rewrite of Edward's earlier tests:
https://patchwork.ozlabs.org/patch/877221/

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/verifier/loops1.c | 161 ++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/verifier/loops1.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c
new file mode 100644
index 000000000000..5e980a5ab69d
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/loops1.c
@@ -0,0 +1,161 @@
+{
+	"bounded loop, count to 4",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop, count to 20",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 20, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded loop, count from positive unknown to 4",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop, count from totally unknown to 4",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded loop, count to 4 with equality",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, -2),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded loop, start in the middle",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_JMP_A(1),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+		BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+		BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "back-edge",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop containing a forward jump",
+	.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+		BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_0, 0),
+		BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -3),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.retval = 4,
+},
+{
+	"bounded loop that jumps out rather than in",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 10000, 2),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	BPF_JMP_A(-4),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"infinite loop after a conditional jump",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 5),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, 2),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_JMP_A(-2),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "program is too large",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"bounded recursion",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "back-edge",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"infinite loop in two jumps",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_JMP_A(0),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "loop detected",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"infinite loop: three-jump trick",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, -11),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "loop detected",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
-- 
cgit v1.2.3-59-g8ed1b


From b061017f8b4d0e05d4c11486581a702fb2a975b2 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Sat, 15 Jun 2019 12:12:24 -0700
Subject: selftests/bpf: add realistic loop tests

Add a bunch of loop tests. Most of them are created by replacing
'#pragma unroll' with '#pragma clang loop unroll(disable)'

Several tests are artificially large:
  /* partial unroll. llvm will unroll loop ~150 times.
   * C loop count -> 600.
   * Asm loop count -> 4.
   * 16k insns in loop body.
   * Total of 5 such loops. Total program size ~82k insns.
   */
  "./pyperf600.o",

  /* no unroll at all.
   * C loop count -> 600.
   * ASM loop count -> 600.
   * ~110 insns in loop body.
   * Total of 5 such loops. Total program size ~1500 insns.
   */
  "./pyperf600_nounroll.o",

  /* partial unroll. 19k insn in a loop.
   * Total program size 20.8k insn.
   * ~350k processed_insns
   */
  "./strobemeta.o",

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/bpf_verif_scale.c     |  67 ++-
 tools/testing/selftests/bpf/progs/loop1.c          |  28 ++
 tools/testing/selftests/bpf/progs/loop2.c          |  28 ++
 tools/testing/selftests/bpf/progs/loop3.c          |  22 +
 tools/testing/selftests/bpf/progs/pyperf.h         |   6 +-
 tools/testing/selftests/bpf/progs/pyperf600.c      |   9 +
 .../selftests/bpf/progs/pyperf600_nounroll.c       |   8 +
 tools/testing/selftests/bpf/progs/strobemeta.c     |  10 +
 tools/testing/selftests/bpf/progs/strobemeta.h     | 528 +++++++++++++++++++++
 .../selftests/bpf/progs/strobemeta_nounroll1.c     |   9 +
 .../selftests/bpf/progs/strobemeta_nounroll2.c     |   9 +
 tools/testing/selftests/bpf/progs/test_seg6_loop.c | 261 ++++++++++
 .../selftests/bpf/progs/test_sysctl_loop1.c        |  71 +++
 .../selftests/bpf/progs/test_sysctl_loop2.c        |  72 +++
 tools/testing/selftests/bpf/progs/test_xdp_loop.c  | 231 +++++++++
 15 files changed, 1347 insertions(+), 12 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/loop1.c
 create mode 100644 tools/testing/selftests/bpf/progs/loop2.c
 create mode 100644 tools/testing/selftests/bpf/progs/loop3.c
 create mode 100644 tools/testing/selftests/bpf/progs/pyperf600.c
 create mode 100644 tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
 create mode 100644 tools/testing/selftests/bpf/progs/strobemeta.c
 create mode 100644 tools/testing/selftests/bpf/progs/strobemeta.h
 create mode 100644 tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
 create mode 100644 tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_seg6_loop.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_loop.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index c0091137074b..e1b55261526f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -5,7 +5,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
 			      const char *format, va_list args)
 {
 	if (level != LIBBPF_DEBUG)
-		return 0;
+		return vfprintf(stderr, format, args);
 
 	if (!strstr(format, "verifier log"))
 		return 0;
@@ -32,24 +32,69 @@ static int check_load(const char *file, enum bpf_prog_type type)
 
 void test_bpf_verif_scale(void)
 {
-	const char *scale[] = {
-		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o"
+	const char *sched_cls[] = {
+		"./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o",
 	};
-	const char *pyperf[] = {
-		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o"
+	const char *raw_tp[] = {
+		/* full unroll by llvm */
+		"./pyperf50.o",	"./pyperf100.o", "./pyperf180.o",
+
+		/* partial unroll. llvm will unroll loop ~150 times.
+		 * C loop count -> 600.
+		 * Asm loop count -> 4.
+		 * 16k insns in loop body.
+		 * Total of 5 such loops. Total program size ~82k insns.
+		 */
+		"./pyperf600.o",
+
+		/* no unroll at all.
+		 * C loop count -> 600.
+		 * ASM loop count -> 600.
+		 * ~110 insns in loop body.
+		 * Total of 5 such loops. Total program size ~1500 insns.
+		 */
+		"./pyperf600_nounroll.o",
+
+		"./loop1.o", "./loop2.o",
+
+		/* partial unroll. 19k insn in a loop.
+		 * Total program size 20.8k insn.
+		 * ~350k processed_insns
+		 */
+		"./strobemeta.o",
+
+		/* no unroll, tiny loops */
+		"./strobemeta_nounroll1.o",
+		"./strobemeta_nounroll2.o",
+	};
+	const char *cg_sysctl[] = {
+		"./test_sysctl_loop1.o", "./test_sysctl_loop2.o",
 	};
 	int err, i;
 
 	if (verifier_stats)
 		libbpf_set_print(libbpf_debug_print);
 
-	for (i = 0; i < ARRAY_SIZE(scale); i++) {
-		err = check_load(scale[i], BPF_PROG_TYPE_SCHED_CLS);
-		printf("test_scale:%s:%s\n", scale[i], err ? "FAIL" : "OK");
+	err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT);
+	printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL");
+
+	for (i = 0; i < ARRAY_SIZE(sched_cls); i++) {
+		err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS);
+		printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK");
 	}
 
-	for (i = 0; i < ARRAY_SIZE(pyperf); i++) {
-		err = check_load(pyperf[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
-		printf("test_scale:%s:%s\n", pyperf[i], err ? "FAIL" : "OK");
+	for (i = 0; i < ARRAY_SIZE(raw_tp); i++) {
+		err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT);
+		printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK");
 	}
+
+	for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) {
+		err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL);
+		printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK");
+	}
+	err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP);
+	printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK");
+
+	err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL);
+	printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK");
 }
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
new file mode 100644
index 000000000000..dea395af9ea9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/kfree_skb")
+int nested_loops(volatile struct pt_regs* ctx)
+{
+	int i, j, sum = 0, m;
+
+	for (j = 0; j < 300; j++)
+		for (i = 0; i < j; i++) {
+			if (j & 1)
+				m = ctx->rax;
+			else
+				m = j;
+			sum += i * m;
+		}
+
+	return sum;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
new file mode 100644
index 000000000000..0637bd8e8bcf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/consume_skb")
+int while_true(volatile struct pt_regs* ctx)
+{
+	int i = 0;
+
+	while (true) {
+		if (ctx->rax & 1)
+			i += 3;
+		else
+			i += 7;
+		if (i > 40)
+			break;
+	}
+
+	return i;
+}
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
new file mode 100644
index 000000000000..30a0f6cba080
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("raw_tracepoint/consume_skb")
+int while_true(volatile struct pt_regs* ctx)
+{
+	__u64 i = 0, sum = 0;
+	do {
+		i++;
+		sum += ctx->rax;
+	} while (i < 0x100000000ULL);
+	return sum;
+}
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 0cc5e4ee90bd..6b0781391be5 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -220,7 +220,11 @@ static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *
 		int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
 		if (symbol_counter == NULL)
 			return 0;
-#pragma unroll
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma clang loop unroll(full)
+#endif
 		/* Unwind python stack */
 		for (int i = 0; i < STACK_MAX_LEN; ++i) {
 			if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
new file mode 100644
index 000000000000..cb49b89e37cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 600
+/* clang will not unroll the loop 600 times.
+ * Instead it will unroll it to the amount it deemed
+ * appropriate, but the loop will still execute 600 times.
+ * Total program size is around 90k insns
+ */
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
new file mode 100644
index 000000000000..6beff7502f4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#define STACK_MAX_LEN 600
+#define NO_UNROLL
+/* clang will not unroll at all.
+ * Total program size is around 2k insns
+ */
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.c b/tools/testing/selftests/bpf/progs/strobemeta.c
new file mode 100644
index 000000000000..d3df3d86f092
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 100
+#define STROBE_MAX_MAP_ENTRIES 20
+/* full unroll by llvm #undef NO_UNROLL */
+#include "strobemeta.h"
+
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
new file mode 100644
index 000000000000..1ff73f60a3e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include "bpf_helpers.h"
+
+typedef uint32_t pid_t;
+struct task_struct {};
+
+#define TASK_COMM_LEN 16
+#define PERF_MAX_STACK_DEPTH 127
+
+#define STROBE_TYPE_INVALID 0
+#define STROBE_TYPE_INT 1
+#define STROBE_TYPE_STR 2
+#define STROBE_TYPE_MAP 3
+
+#define STACK_TABLE_EPOCH_SHIFT 20
+#define STROBE_MAX_STR_LEN 1
+#define STROBE_MAX_CFGS 32
+#define STROBE_MAX_PAYLOAD						\
+	(STROBE_MAX_STRS * STROBE_MAX_STR_LEN +				\
+	STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+
+struct strobe_value_header {
+	/*
+	 * meaning depends on type:
+	 * 1. int: 0, if value not set, 1 otherwise
+	 * 2. str: 1 always, whether value is set or not is determined by ptr
+	 * 3. map: 1 always, pointer points to additional struct with number
+	 *    of entries (up to STROBE_MAX_MAP_ENTRIES)
+	 */
+	uint16_t len;
+	/*
+	 * _reserved might be used for some future fields/flags, but we always
+	 * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
+	 * bytes in one go and get both header and value
+	 */
+	uint8_t _reserved[6];
+};
+
+/*
+ * strobe_value_generic is used from BPF probe only, but needs to be a union
+ * of strobe_value_int/strobe_value_str/strobe_value_map
+ */
+struct strobe_value_generic {
+	struct strobe_value_header header;
+	union {
+		int64_t val;
+		void *ptr;
+	};
+};
+
+struct strobe_value_int {
+	struct strobe_value_header header;
+	int64_t value;
+};
+
+struct strobe_value_str {
+	struct strobe_value_header header;
+	const char* value;
+};
+
+struct strobe_value_map {
+	struct strobe_value_header header;
+	const struct strobe_map_raw* value;
+};
+
+struct strobe_map_entry {
+	const char* key;
+	const char* val;
+};
+
+/*
+ * Map of C-string key/value pairs with fixed maximum capacity. Each map has
+ * corresponding int64 ID, which application can use (or ignore) in whatever
+ * way appropriate. Map is "write-only", there is no way to get data out of
+ * map. Map is intended to be used to provide metadata for profilers and is
+ * not to be used for internal in-app communication. All methods are
+ * thread-safe.
+ */
+struct strobe_map_raw {
+	/*
+	 * general purpose unique ID that's up to application to decide
+	 * whether and how to use; for request metadata use case id is unique
+	 * request ID that's used to match metadata with stack traces on
+	 * Strobelight backend side
+	 */
+	int64_t id;
+	/* number of used entries in map */
+	int64_t cnt;
+	/*
+	 * having volatile doesn't change anything on BPF side, but clang
+	 * emits warnings for passing `volatile const char *` into
+	 * bpf_probe_read_str that expects just `const char *`
+	 */
+	const char* tag;
+	/*
+	 * key/value entries, each consisting of 2 pointers to key and value
+	 * C strings
+	 */
+	struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
+};
+
+/* Following values define supported values of TLS mode */
+#define TLS_NOT_SET -1
+#define TLS_LOCAL_EXEC 0
+#define TLS_IMM_EXEC 1
+#define TLS_GENERAL_DYN 2
+
+/*
+ * structure that universally represents TLS location (both for static
+ * executables and shared libraries)
+ */
+struct strobe_value_loc {
+	/*
+	 * tls_mode defines what TLS mode was used for particular metavariable:
+	 * - -1 (TLS_NOT_SET) - no metavariable;
+	 * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
+	 * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
+	 * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
+	 * Local Dynamic mode is not yet supported, because never seen in
+	 * practice.  Mode defines how offset field is interpreted. See
+	 * calc_location() in below for details.
+	 */
+	int64_t tls_mode;
+	/*
+	 * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
+	 * tpidr_el0 for aarch64).
+	 * TLS_IMM_EXEC: absolute address of GOT entry containing offset
+	 * from thread pointer;
+	 * TLS_GENERAL_DYN: absolute addres of double GOT entry
+	 * containing tls_index_t struct;
+	 */
+	int64_t offset;
+};
+
+struct strobemeta_cfg {
+	int64_t req_meta_idx;
+	struct strobe_value_loc int_locs[STROBE_MAX_INTS];
+	struct strobe_value_loc str_locs[STROBE_MAX_STRS];
+	struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
+};
+
+struct strobe_map_descr {
+	uint64_t id;
+	int16_t tag_len;
+	/*
+	 * cnt <0 - map value isn't set;
+	 * 0 - map has id set, but no key/value entries
+	 */
+	int16_t cnt;
+	/*
+	 * both key_lens[i] and val_lens[i] should be >0 for present key/value
+	 * entry
+	 */
+	uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
+	uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
+};
+
+struct strobemeta_payload {
+	/* req_id has valid request ID, if req_meta_valid == 1 */
+	int64_t req_id;
+	uint8_t req_meta_valid;
+	/*
+	 * mask has Nth bit set to 1, if Nth metavar was present and
+	 * successfully read
+	 */
+	uint64_t int_vals_set_mask;
+	int64_t int_vals[STROBE_MAX_INTS];
+	/* len is >0 for present values */
+	uint16_t str_lens[STROBE_MAX_STRS];
+	/* if map_descrs[i].cnt == -1, metavar is not present/set */
+	struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
+	/*
+	 * payload has compactly packed values of str and map variables in the
+	 * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
+	 * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
+	 * value length
+	 */
+	char payload[STROBE_MAX_PAYLOAD];
+};
+
+struct strobelight_bpf_sample {
+	uint64_t ktime;
+	char comm[TASK_COMM_LEN];
+	pid_t pid;
+	int user_stack_id;
+	int kernel_stack_id;
+	int has_meta;
+	struct strobemeta_payload metadata;
+	/*
+	 * makes it possible to pass (<real payload size> + 1) as data size to
+	 * perf_submit() to avoid perf_submit's paranoia about passing zero as
+	 * size, as it deduces that <real payload size> might be
+	 * **theoretically** zero
+	 */
+	char dummy_safeguard;
+};
+
+struct bpf_map_def SEC("maps") samples = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 32,
+};
+
+struct bpf_map_def SEC("maps") stacks_0 = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(uint32_t),
+	.value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
+	.max_entries = 16,
+};
+
+struct bpf_map_def SEC("maps") stacks_1 = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(uint32_t),
+	.value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
+	.max_entries = 16,
+};
+
+struct bpf_map_def SEC("maps") sample_heap = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(uint32_t),
+	.value_size = sizeof(struct strobelight_bpf_sample),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") strobemeta_cfgs = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(pid_t),
+	.value_size = sizeof(struct strobemeta_cfg),
+	.max_entries = STROBE_MAX_CFGS,
+};
+
+/* Type for the dtv.  */
+/* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
+typedef union dtv {
+	size_t counter;
+	struct {
+		void* val;
+		bool is_static;
+	} pointer;
+} dtv_t;
+
+/* Partial definition for tcbhead_t */
+/* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
+struct tcbhead {
+	void* tcb;
+	dtv_t* dtv;
+};
+
+/*
+ * TLS module/offset information for shared library case.
+ * For x86-64, this is mapped onto two entries in GOT.
+ * For aarch64, this is pointed to by second GOT entry.
+ */
+struct tls_index {
+	uint64_t module;
+	uint64_t offset;
+};
+
+static inline __attribute__((always_inline))
+void *calc_location(struct strobe_value_loc *loc, void *tls_base)
+{
+	/*
+	 * tls_mode value is:
+	 * - -1 (TLS_NOT_SET), if no metavar is present;
+	 * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
+	 * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
+	 * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
+	 * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
+	 * This schema allows to use something like:
+	 * (tls_mode + 1) * (tls_base + offset)
+	 * to get NULL for "no metavar" location, or correct pointer for local
+	 * executable mode without doing extra ifs.
+	 */
+	if (loc->tls_mode <= TLS_LOCAL_EXEC) {
+		/* static executable is simple, we just have offset from
+		 * tls_base */
+		void *addr = tls_base + loc->offset;
+		/* multiply by (tls_mode + 1) to get NULL, if we have no
+		 * metavar in this slot */
+		return (void *)((loc->tls_mode + 1) * (int64_t)addr);
+	}
+	/*
+	 * Other modes are more complicated, we need to jump through few hoops.
+	 *
+	 * For immediate executable mode (currently supported only for aarch64):
+	 *  - loc->offset is pointing to a GOT entry containing fixed offset
+	 *  relative to tls_base;
+	 *
+	 * For general dynamic mode:
+	 *  - loc->offset is pointing to a beginning of double GOT entries;
+	 *  - (for aarch64 only) second entry points to tls_index_t struct;
+	 *  - (for x86-64 only) two GOT entries are already tls_index_t;
+	 *  - tls_index_t->module is used to find start of TLS section in
+	 *  which variable resides;
+	 *  - tls_index_t->offset provides offset within that TLS section,
+	 *  pointing to value of variable.
+	 */
+	struct tls_index tls_index;
+	dtv_t *dtv;
+	void *tls_ptr;
+
+	bpf_probe_read(&tls_index, sizeof(struct tls_index),
+		       (void *)loc->offset);
+	/* valid module index is always positive */
+	if (tls_index.module > 0) {
+		/* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
+		bpf_probe_read(&dtv, sizeof(dtv),
+			       &((struct tcbhead *)tls_base)->dtv);
+		dtv += tls_index.module;
+	} else {
+		dtv = NULL;
+	}
+	bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
+	/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
+	return tls_ptr && tls_ptr != (void *)-1
+		? tls_ptr + tls_index.offset
+		: NULL;
+}
+
+static inline __attribute__((always_inline))
+void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
+		  struct strobe_value_generic *value,
+		  struct strobemeta_payload *data)
+{
+	void *location = calc_location(&cfg->int_locs[idx], tls_base);
+	if (!location)
+		return;
+
+	bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+	data->int_vals[idx] = value->val;
+	if (value->header.len)
+		data->int_vals_set_mask |= (1 << idx);
+}
+
+static inline __attribute__((always_inline))
+uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base,
+		      struct strobe_value_generic *value,
+		      struct strobemeta_payload *data, void *payload)
+{
+	void *location;
+	uint32_t len;
+
+	data->str_lens[idx] = 0;
+	location = calc_location(&cfg->str_locs[idx], tls_base);
+	if (!location)
+		return 0;
+
+	bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+	len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+	/*
+	 * if bpf_probe_read_str returns error (<0), due to casting to
+	 * unsinged int, it will become big number, so next check is
+	 * sufficient to check for errors AND prove to BPF verifier, that
+	 * bpf_probe_read_str won't return anything bigger than
+	 * STROBE_MAX_STR_LEN
+	 */
+	if (len > STROBE_MAX_STR_LEN)
+		return 0;
+
+	data->str_lens[idx] = len;
+	return len;
+}
+
+static inline __attribute__((always_inline))
+void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
+		   struct strobe_value_generic *value,
+		   struct strobemeta_payload* data, void *payload)
+{
+	struct strobe_map_descr* descr = &data->map_descrs[idx];
+	struct strobe_map_raw map;
+	void *location;
+	uint32_t len;
+	int i;
+
+	descr->tag_len = 0; /* presume no tag is set */
+	descr->cnt = -1; /* presume no value is set */
+
+	location = calc_location(&cfg->map_locs[idx], tls_base);
+	if (!location)
+		return payload;
+
+	bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
+	if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
+		return payload;
+
+	descr->id = map.id;
+	descr->cnt = map.cnt;
+	if (cfg->req_meta_idx == idx) {
+		data->req_id = map.id;
+		data->req_meta_valid = 1;
+	}
+
+	len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
+	if (len <= STROBE_MAX_STR_LEN) {
+		descr->tag_len = len;
+		payload += len;
+	}
+
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) {
+		descr->key_lens[i] = 0;
+		len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
+					 map.entries[i].key);
+		if (len <= STROBE_MAX_STR_LEN) {
+			descr->key_lens[i] = len;
+			payload += len;
+		}
+		descr->val_lens[i] = 0;
+		len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
+					 map.entries[i].val);
+		if (len <= STROBE_MAX_STR_LEN) {
+			descr->val_lens[i] = len;
+			payload += len;
+		}
+	}
+
+	return payload;
+}
+
+/*
+ * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
+ * pointer to *right after* payload ends
+ */
+static inline __attribute__((always_inline))
+void *read_strobe_meta(struct task_struct* task,
+		       struct strobemeta_payload* data) {
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	struct strobe_value_generic value = {0};
+	struct strobemeta_cfg *cfg;
+	void *tls_base, *payload;
+
+	cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
+	if (!cfg)
+		return NULL;
+
+	data->int_vals_set_mask = 0;
+	data->req_meta_valid = 0;
+	payload = data->payload;
+	/*
+	 * we don't have struct task_struct definition, it should be:
+	 * tls_base = (void *)task->thread.fsbase;
+	 */
+	tls_base = (void *)task;
+
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_INTS; ++i) {
+		read_int_var(cfg, i, tls_base, &value, data);
+	}
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_STRS; ++i) {
+		payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+	}
+#ifdef NO_UNROLL
+#pragma clang loop unroll(disable)
+#else
+#pragma unroll
+#endif
+	for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
+		payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+	}
+	/*
+	 * return pointer right after end of payload, so it's possible to
+	 * calculate exact amount of useful data that needs to be sent
+	 */
+	return payload;
+}
+
+SEC("raw_tracepoint/kfree_skb")
+int on_event(struct pt_regs *ctx) {
+	pid_t pid =  bpf_get_current_pid_tgid() >> 32;
+	struct strobelight_bpf_sample* sample;
+	struct task_struct *task;
+	uint32_t zero = 0;
+	uint64_t ktime_ns;
+	void *sample_end;
+
+	sample = bpf_map_lookup_elem(&sample_heap, &zero);
+	if (!sample)
+		return 0; /* this will never happen */
+
+	sample->pid = pid;
+	bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
+	ktime_ns = bpf_ktime_get_ns();
+	sample->ktime = ktime_ns;
+
+	task = (struct task_struct *)bpf_get_current_task();
+	sample_end = read_strobe_meta(task, &sample->metadata);
+	sample->has_meta = sample_end != NULL;
+	sample_end = sample_end ? : &sample->metadata;
+
+	if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
+		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
+		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
+	} else {
+		sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
+		sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
+	}
+
+	uint64_t sample_size = sample_end - (void *)sample;
+	/* should always be true */
+	if (sample_size < sizeof(struct strobelight_bpf_sample))
+		bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
new file mode 100644
index 000000000000..f0a1669e11d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll1.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
new file mode 100644
index 000000000000..4291a7d642e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_nounroll2.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 30
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
new file mode 100644
index 000000000000..463964d79f73
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -0,0 +1,261 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+				0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+				0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+	unsigned int ver:4;
+	unsigned int priority:8;
+	unsigned int flow_label:20;
+	unsigned short payload_len;
+	unsigned char next_header;
+	unsigned char hop_limit;
+	unsigned long long src_hi;
+	unsigned long long src_lo;
+	unsigned long long dst_hi;
+	unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+	unsigned long long hi;
+	unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+	unsigned char nexthdr;
+	unsigned char hdrlen;
+	unsigned char type;
+	unsigned char segments_left;
+	unsigned char first_segment;
+	unsigned char flags;
+	unsigned short tag;
+
+	struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+	unsigned char type;
+	unsigned char len;
+	unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+	void *cursor, *data_end;
+	struct ip6_srh_t *srh;
+	struct ip6_t *ip;
+	uint8_t *ipver;
+
+	data_end = (void *)(long)skb->data_end;
+	cursor = (void *)(long)skb->data;
+	ipver = (uint8_t *)cursor;
+
+	if ((void *)ipver + sizeof(*ipver) > data_end)
+		return NULL;
+
+	if ((*ipver >> 4) != 6)
+		return NULL;
+
+	ip = cursor_advance(cursor, sizeof(*ip));
+	if ((void *)ip + sizeof(*ip) > data_end)
+		return NULL;
+
+	if (ip->next_header != 43)
+		return NULL;
+
+	srh = cursor_advance(cursor, sizeof(*srh));
+	if ((void *)srh + sizeof(*srh) > data_end)
+		return NULL;
+
+	if (srh->type != 4)
+		return NULL;
+
+	return srh;
+}
+
+static __attribute__((always_inline))
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+		   uint32_t old_pad, uint32_t pad_off)
+{
+	int err;
+
+	if (new_pad != old_pad) {
+		err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+					  (int) new_pad - (int) old_pad);
+		if (err)
+			return err;
+	}
+
+	if (new_pad > 0) {
+		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0};
+		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+		pad_tlv->type = SR6_TLV_PADDING;
+		pad_tlv->len = new_pad - 2;
+
+		err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+					       (void *)pad_tlv_buf, new_pad);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static __attribute__((always_inline))
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+			  uint32_t *tlv_off, uint32_t *pad_size,
+			  uint32_t *pad_off)
+{
+	uint32_t srh_off, cur_off;
+	int offset_valid = 0;
+	int err;
+
+	srh_off = (char *)srh - (char *)(long)skb->data;
+	// cur_off = end of segments, start of possible TLVs
+	cur_off = srh_off + sizeof(*srh) +
+		sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+	*pad_off = 0;
+
+	// we can only go as far as ~10 TLVs due to the BPF max stack size
+	#pragma clang loop unroll(disable)
+	for (int i = 0; i < 100; i++) {
+		struct sr6_tlv_t tlv;
+
+		if (cur_off == *tlv_off)
+			offset_valid = 1;
+
+		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+			break;
+
+		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+		if (err)
+			return err;
+
+		if (tlv.type == SR6_TLV_PADDING) {
+			*pad_size = tlv.len + sizeof(tlv);
+			*pad_off = cur_off;
+
+			if (*tlv_off == srh_off) {
+				*tlv_off = cur_off;
+				offset_valid = 1;
+			}
+			break;
+
+		} else if (tlv.type == SR6_TLV_HMAC) {
+			break;
+		}
+
+		cur_off += sizeof(tlv) + tlv.len;
+	} // we reached the padding or HMAC TLVs, or the end of the SRH
+
+	if (*pad_off == 0)
+		*pad_off = cur_off;
+
+	if (*tlv_off == -1)
+		*tlv_off = cur_off;
+	else if (!offset_valid)
+		return -EINVAL;
+
+	return 0;
+}
+
+static __attribute__((always_inline))
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+	uint8_t len_remaining, new_pad;
+	uint32_t pad_off = 0;
+	uint32_t pad_size = 0;
+	uint32_t partial_srh_len;
+	int err;
+
+	if (tlv_off != -1)
+		tlv_off += srh_off;
+
+	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+		return -EINVAL;
+
+	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+	if (err)
+		return err;
+
+	// the following can't be moved inside update_tlv_pad because the
+	// bpf verifier has some issues with it
+	pad_off += sizeof(*itlv) + itlv->len;
+	partial_srh_len = pad_off - srh_off;
+	len_remaining = partial_srh_len % 8;
+	new_pad = 8 - len_remaining;
+
+	if (new_pad == 1) // cannot pad for 1 byte only
+		new_pad = 9;
+	else if (new_pad == 8)
+		new_pad = 0;
+
+	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("lwt_seg6local")
+int __add_egr_x(struct __sk_buff *skb)
+{
+	unsigned long long hi = 0xfc42000000000000;
+	unsigned long long lo = 0x1;
+	struct ip6_srh_t *srh = get_srh(skb);
+	uint8_t new_flags = SR6_FLAG_ALERT;
+	struct ip6_addr_t addr;
+	int err, offset;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+		      (struct sr6_tlv_t *)&tlv, 20);
+	if (err)
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+	err = bpf_lwt_seg6_store_bytes(skb, offset,
+				       (void *)&new_flags, sizeof(new_flags));
+	if (err)
+		return BPF_DROP;
+
+	addr.lo = htonll(lo);
+	addr.hi = htonll(hi);
+	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+				  (void *)&addr, sizeof(addr));
+	if (err)
+		return BPF_DROP;
+	return BPF_REDIRECT;
+}
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
new file mode 100644
index 000000000000..608a06871572
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
+#define TCP_MEM_LOOPS 28  /* because 30 doesn't fit into 512 bytes of stack */
+#define MAX_ULONG_STR_LEN 7
+#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+
+static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
+	unsigned char i;
+	char name[64];
+	int ret;
+
+	memset(name, 0, sizeof(name));
+	ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+	if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < sizeof(tcp_mem_name); ++i)
+		if (name[i] != tcp_mem_name[i])
+			return 0;
+
+	return 1;
+}
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+	unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
+	char value[MAX_VALUE_STR_LEN];
+	unsigned char i, off = 0;
+	int ret;
+
+	if (ctx->write)
+		return 0;
+
+	if (!is_tcp_mem(ctx))
+		return 0;
+
+	ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+	if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+		ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+				  tcp_mem + i);
+		if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+			return 0;
+		off += ret & MAX_ULONG_STR_LEN;
+	}
+
+	return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
new file mode 100644
index 000000000000..cb201cbe11e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
+#define TCP_MEM_LOOPS 20  /* because 30 doesn't fit into 512 bytes of stack */
+#define MAX_ULONG_STR_LEN 7
+#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+
+static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
+{
+	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
+	unsigned char i;
+	char name[64];
+	int ret;
+
+	memset(name, 0, sizeof(name));
+	ret = bpf_sysctl_get_name(ctx, name, sizeof(name), 0);
+	if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < sizeof(tcp_mem_name); ++i)
+		if (name[i] != tcp_mem_name[i])
+			return 0;
+
+	return 1;
+}
+
+
+SEC("cgroup/sysctl")
+int sysctl_tcp_mem(struct bpf_sysctl *ctx)
+{
+	unsigned long tcp_mem[TCP_MEM_LOOPS] = {};
+	char value[MAX_VALUE_STR_LEN];
+	unsigned char i, off = 0;
+	int ret;
+
+	if (ctx->write)
+		return 0;
+
+	if (!is_tcp_mem(ctx))
+		return 0;
+
+	ret = bpf_sysctl_get_current_value(ctx, value, MAX_VALUE_STR_LEN);
+	if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
+		return 0;
+
+#pragma clang loop unroll(disable)
+	for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
+		ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
+				  tcp_mem + i);
+		if (ret <= 0 || ret > MAX_ULONG_STR_LEN)
+			return 0;
+		off += ret & MAX_ULONG_STR_LEN;
+	}
+
+	return tcp_mem[0] < tcp_mem[1] && tcp_mem[1] < tcp_mem[2];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
new file mode 100644
index 000000000000..7fa4677df22e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_iptunnel_common.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct iptnl_info),
+	.max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+	__u64 *rxcnt_count;
+
+	rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+	if (rxcnt_count)
+		*rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+				     __u8 protocol)
+{
+	struct tcphdr *th;
+	struct udphdr *uh;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		th = (struct tcphdr *)trans_data;
+		if (th + 1 > data_end)
+			return -1;
+		return th->dest;
+	case IPPROTO_UDP:
+		uh = (struct udphdr *)trans_data;
+		if (uh + 1 > data_end)
+			return -1;
+		return uh->dest;
+	default:
+		return 0;
+	}
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+				       const struct ethhdr *old_eth,
+				       const struct iptnl_info *tnl,
+				       __be16 h_proto)
+{
+	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+	new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct iphdr *iph = data + sizeof(struct ethhdr);
+	__u16 *next_iph;
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+	__u32 csum = 0;
+	int i;
+
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(iph + 1, data_end, iph->protocol);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = iph->protocol;
+	vip.family = AF_INET;
+	vip.daddr.v4 = iph->daddr;
+	vip.dport = dport;
+	payload_len = bpf_ntohs(iph->tot_len);
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v4-in-v4 */
+	if (!tnl || tnl->family != AF_INET)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	iph = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*iph);
+
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end ||
+	    iph + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+	iph->version = 4;
+	iph->ihl = sizeof(*iph) >> 2;
+	iph->frag_off =	0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 0;
+	iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
+	iph->daddr = tnl->daddr.v4;
+	iph->saddr = tnl->saddr.v4;
+	iph->ttl = 8;
+
+	next_iph = (__u16 *)iph;
+#pragma clang loop unroll(disable)
+	for (i = 0; i < sizeof(*iph) >> 1; i++)
+		csum += *next_iph++;
+
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = ip6h->nexthdr;
+	vip.family = AF_INET6;
+	memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+	vip.dport = dport;
+	payload_len = ip6h->payload_len;
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v6-in-v6 */
+	if (!tnl || tnl->family != AF_INET6)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	ip6h = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*ip6h);
+
+	if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+	    ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+	ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip6h->hop_limit = 8;
+	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+	memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == bpf_htons(ETH_P_IP))
+		return handle_ipv4(xdp);
+	else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+		return handle_ipv6(xdp);
+	else
+		return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From dcc5e1f9cad0e31198c4a4254981928299b49054 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 19 Jun 2019 09:41:09 +0300
Subject: selftests: tc: add ingress device matching support

Extend tc_flower to test plain ingress device matching and also
tc_shblock to test ingress device matching on shared block.
Add new tc_flower_router.sh where ingress device matching on egress
(after routing) is done.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/forwarding/tc_flower.sh  |  26 +++-
 .../selftests/net/forwarding/tc_flower_router.sh   | 172 +++++++++++++++++++++
 .../selftests/net/forwarding/tc_shblocks.sh        |  29 +++-
 3 files changed, 225 insertions(+), 2 deletions(-)
 create mode 100755 tools/testing/selftests/net/forwarding/tc_flower_router.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 124803eea4a9..058c746ee300 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -3,7 +3,7 @@
 
 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
 	match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \
-	match_ip_tos_test"
+	match_ip_tos_test match_indev_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -310,6 +310,30 @@ match_ip_tos_test()
 	log_test "ip_tos match ($tcflags)"
 }
 
+match_indev_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags indev $h1 dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags indev $h2 dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "indev match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_router.sh b/tools/testing/selftests/net/forwarding/tc_flower_router.sh
new file mode 100755
index 000000000000..4aee9c9e69f6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_router.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_indev_egress_test"
+NUM_NETIFS=6
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+
+	ip route add 192.0.2.0/24 vrf v$h1 nexthop via 192.0.1.2
+	ip route add 192.0.3.0/24 vrf v$h1 nexthop via 192.0.1.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.3.0/24 vrf v$h1
+	ip route del 192.0.2.0/24 vrf v$h1
+
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+
+	ip route add 192.0.1.0/24 vrf v$h2 nexthop via 192.0.2.2
+	ip route add 192.0.3.0/24 vrf v$h2 nexthop via 192.0.2.2
+}
+
+h2_destroy()
+{
+	ip route del 192.0.3.0/24 vrf v$h2
+	ip route del 192.0.1.0/24 vrf v$h2
+
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.3.1/24
+
+	ip route add 192.0.1.0/24 vrf v$h3 nexthop via 192.0.3.2
+	ip route add 192.0.2.0/24 vrf v$h3 nexthop via 192.0.3.2
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/24 vrf v$h3
+	ip route del 192.0.1.0/24 vrf v$h3
+
+	simple_if_fini $h3 192.0.3.1/24
+}
+
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+	ip link set dev $rp3 up
+
+	tc qdisc add dev $rp3 clsact
+
+	ip address add 192.0.1.2/24 dev $rp1
+	ip address add 192.0.2.2/24 dev $rp2
+	ip address add 192.0.3.2/24 dev $rp3
+}
+
+router_destroy()
+{
+	ip address del 192.0.3.2/24 dev $rp3
+	ip address del 192.0.2.2/24 dev $rp2
+	ip address del 192.0.1.2/24 dev $rp1
+
+	tc qdisc del dev $rp3 clsact
+
+	ip link set dev $rp3 down
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+match_indev_egress_test()
+{
+	RET=0
+
+	tc filter add dev $rp3 egress protocol ip pref 1 handle 101 flower \
+		$tcflags indev $rp1 dst_ip 192.0.3.1 action drop
+	tc filter add dev $rp3 egress protocol ip pref 2 handle 102 flower \
+		$tcflags indev $rp2 dst_ip 192.0.3.1 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac -A 192.0.1.1 -B 192.0.3.1 \
+		-t ip -q
+
+	tc_check_packets "dev $rp3 egress" 102 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $rp3 egress" 101 1
+	check_err $? "Did not match on correct filter"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $rp2mac -A 192.0.2.1 -B 192.0.3.1 \
+		-t ip -q
+
+	tc_check_packets "dev $rp3 egress" 101 2
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $rp3 egress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $rp3 egress protocol ip pref 2 handle 102 flower
+	tc filter del dev $rp3 egress protocol ip pref 1 handle 101 flower
+
+	log_test "indev egress match ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	h2=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	h3=${NETIFS[p5]}
+	rp3=${NETIFS[p6]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+	h2mac=$(mac_get $h2)
+	rp2mac=$(mac_get $rp2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index 9826a446e2c0..772e00ac3230 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="shared_block_test"
+ALL_TESTS="shared_block_test match_indev_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -70,6 +70,33 @@ shared_block_test()
 	log_test "shared block ($tcflags)"
 }
 
+match_indev_test()
+{
+	RET=0
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		$tcflags indev $swp1 dst_mac $swmac action drop
+	tc filter add block 22 protocol ip pref 2 handle 102 flower \
+		$tcflags indev $swp2 dst_mac $swmac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 1
+	check_err $? "Did not match first incoming packet on a block"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 102 1
+	check_err $? "Did not match second incoming packet on a block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+	tc filter del block 22 protocol ip pref 2 handle 102 flower
+
+	log_test "indev match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
-- 
cgit v1.2.3-59-g8ed1b


From 22f1a38a547b5ce5e73b0a77339cbf23f70dee5d Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 18 Jun 2019 16:03:04 -0400
Subject: selftests/net: make udpgso_bench skip unsupported testcases

Kselftest can be run against older kernels. Instead of failing hard
when a feature is unsupported, return the KSFT_SKIP exit code.

Specifically, do not fail hard on missing udp zerocopy.

The udp gso bench test runs multiple test cases from a single script.
Fail if any case fails, else return skip if any test is skipped.

Link: https://lore.kernel.org/lkml/20190618171516.GA17547@kroah.com/
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgso_bench.sh   | 75 +++++++++++++++------------
 tools/testing/selftests/net/udpgso_bench_tx.c | 18 ++++++-
 2 files changed, 59 insertions(+), 34 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 4df1cd8d69d2..80b5d352702e 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -3,9 +3,47 @@
 #
 # Run a series of udpgso benchmarks
 
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
+readonly GREEN='\033[0;92m'
+readonly YELLOW='\033[0;33m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m' # No Color
+
+readonly KSFT_PASS=0
+readonly KSFT_FAIL=1
+readonly KSFT_SKIP=4
+
+num_pass=0
+num_err=0
+num_skip=0
+
+kselftest_test_exitcode() {
+	local -r exitcode=$1
+
+	if [[ ${exitcode} -eq ${KSFT_PASS} ]]; then
+		num_pass=$(( $num_pass + 1 ))
+	elif [[ ${exitcode} -eq ${KSFT_SKIP} ]]; then
+		num_skip=$(( $num_skip + 1 ))
+	else
+		num_err=$(( $num_err + 1 ))
+	fi
+}
+
+kselftest_exit() {
+	echo -e "$(basename $0): PASS=${num_pass} SKIP=${num_skip} FAIL=${num_err}"
+
+	if [[ $num_err -ne 0 ]]; then
+		echo -e "$(basename $0): ${RED}FAIL${NC}"
+		exit ${KSFT_FAIL}
+	fi
+
+	if [[ $num_skip -ne 0 ]]; then
+		echo -e "$(basename $0): ${YELLOW}SKIP${NC}"
+		exit ${KSFT_SKIP}
+	fi
+
+	echo -e "$(basename $0): ${GREEN}PASS${NC}"
+	exit ${KSFT_PASS}
+}
 
 wake_children() {
 	local -r jobs="$(jobs -p)"
@@ -29,93 +67,66 @@ run_in_netns() {
 	local -r args=$@
 
 	./in_netns.sh $0 __subprocess ${args}
+	kselftest_test_exitcode $?
 }
 
 run_udp() {
 	local -r args=$@
-	local errors=0
 
 	echo "udp"
 	run_in_netns ${args}
-	errors=$(( $errors + $? ))
 
 	echo "udp gso"
 	run_in_netns ${args} -S 0
-	errors=$(( $errors + $? ))
 
 	echo "udp gso zerocopy"
 	run_in_netns ${args} -S 0 -z
-	errors=$(( $errors + $? ))
 
 	echo "udp gso timestamp"
 	run_in_netns ${args} -S 0 -T
-	errors=$(( $errors + $? ))
 
 	echo "udp gso zerocopy audit"
 	run_in_netns ${args} -S 0 -z -a
-	errors=$(( $errors + $? ))
 
 	echo "udp gso timestamp audit"
 	run_in_netns ${args} -S 0 -T -a
-	errors=$(( $errors + $? ))
 
 	echo "udp gso zerocopy timestamp audit"
 	run_in_netns ${args} -S 0 -T -z -a
-	errors=$(( $errors + $? ))
-
-	return $errors
 }
 
 run_tcp() {
 	local -r args=$@
-	local errors=0
 
 	echo "tcp"
 	run_in_netns ${args} -t
-	errors=$(( $errors + $? ))
 
 	echo "tcp zerocopy"
 	run_in_netns ${args} -t -z
-	errors=$(( $errors + $? ))
 
 	# excluding for now because test fails intermittently
 	# add -P option to include poll() to reduce possibility of lost messages
 	#echo "tcp zerocopy audit"
 	#run_in_netns ${args} -t -z -P -a
-	#errors=$(( $errors + $? ))
-
-	return $errors
 }
 
 run_all() {
 	local -r core_args="-l 3"
 	local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
 	local -r ipv6_args="${core_args} -6 -D ::1"
-	local errors=0
 
 	echo "ipv4"
 	run_tcp "${ipv4_args}"
-	errors=$(( $errors + $? ))
 	run_udp "${ipv4_args}"
-	errors=$(( $errors + $? ))
 
 	echo "ipv6"
 	run_tcp "${ipv4_args}"
-	errors=$(( $errors + $? ))
 	run_udp "${ipv6_args}"
-	errors=$(( $errors + $? ))
-
-	return $errors
 }
 
 if [[ $# -eq 0 ]]; then
 	run_all
-	if [ $? -ne 0 ]; then
-		echo -e "$(basename $0): ${RED}FAIL${NC}"
-		exit 1
-	fi
-
-	echo -e "$(basename $0): ${GREEN}PASS${NC}"
+	kselftest_exit
 elif [[ $1 == "__subprocess" ]]; then
 	shift
 	run_one $@
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index dfa83ad57206..ada99496634a 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -25,6 +25,8 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "../kselftest.h"
+
 #ifndef ETH_MAX_MTU
 #define ETH_MAX_MTU 0xFFFFU
 #endif
@@ -45,6 +47,10 @@
 #define MSG_ZEROCOPY	0x4000000
 #endif
 
+#ifndef ENOTSUPP
+#define ENOTSUPP	524
+#endif
+
 #define NUM_PKT		100
 
 static bool	cfg_cache_trash;
@@ -603,7 +609,7 @@ int main(int argc, char **argv)
 {
 	unsigned long num_msgs, num_sends;
 	unsigned long tnow, treport, tstop;
-	int fd, i, val;
+	int fd, i, val, ret;
 
 	parse_opts(argc, argv);
 
@@ -623,8 +629,16 @@ int main(int argc, char **argv)
 
 	if (cfg_zerocopy) {
 		val = 1;
-		if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+
+		ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+				 &val, sizeof(val));
+		if (ret) {
+			if (errno == ENOPROTOOPT || errno == ENOTSUPP) {
+				fprintf(stderr, "SO_ZEROCOPY not supported");
+				exit(KSFT_SKIP);
+			}
 			error(1, errno, "setsockopt zerocopy");
+		}
 	}
 
 	if (cfg_connected &&
-- 
cgit v1.2.3-59-g8ed1b


From 58ade67b021549d540e2ed4be6971c5e3240388f Mon Sep 17 00:00:00 2001
From: Shijie Luo <luoshijie1@huawei.com>
Date: Tue, 18 Jun 2019 15:14:05 +0000
Subject: selftests: add route_localnet test script

Add a simple scripts to exercise several situations when enable
route_localnet.

Signed-off-by: Shijie Luo <luoshijie1@huawei.com>
Signed-off-by: Zhiqiang liu <liuzhiqiang26@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/route_localnet.sh | 74 +++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100755 tools/testing/selftests/net/route_localnet.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh
new file mode 100755
index 000000000000..116bfeab72fa
--- /dev/null
+++ b/tools/testing/selftests/net/route_localnet.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a couple of tests when route_localnet = 1.
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+setup() {
+    ip netns add "${PEER_NS}"
+    ip -netns "${PEER_NS}" link set dev lo up
+    ip link add name veth0 type veth peer name veth1
+    ip link set dev veth0 up
+    ip link set dev veth1 netns "${PEER_NS}"
+
+    # Enable route_localnet and delete useless route 127.0.0.0/8.
+    sysctl -w net.ipv4.conf.veth0.route_localnet=1
+    ip netns exec "${PEER_NS}" sysctl -w net.ipv4.conf.veth1.route_localnet=1
+    ip route del 127.0.0.0/8 dev lo table local
+    ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local
+
+    ifconfig veth0 127.25.3.4/24 up
+    ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up
+
+    ip route flush cache
+    ip netns exec "${PEER_NS}" ip route flush cache
+}
+
+cleanup() {
+    ip link del veth0
+    ip route add local 127.0.0.0/8 dev lo proto kernel scope host src 127.0.0.1
+    local -r ns="$(ip netns list|grep $PEER_NS)"
+    [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+
+# Run test when arp_announce = 2.
+run_arp_announce_test() {
+    echo "run arp_announce test"
+    setup
+
+    sysctl -w net.ipv4.conf.veth0.arp_announce=2
+    ip netns exec "${PEER_NS}" sysctl -w net.ipv4.conf.veth1.arp_announce=2
+    ping -c5 -I veth0 127.25.3.14
+    if [ $? -ne 0 ];then
+        echo "failed"
+    else
+        echo "ok"
+    fi
+
+    cleanup
+}
+
+# Run test when arp_ignore = 3.
+run_arp_ignore_test() {
+    echo "run arp_ignore test"
+    setup
+
+    sysctl -w net.ipv4.conf.veth0.arp_ignore=3
+    ip netns exec "${PEER_NS}" sysctl -w net.ipv4.conf.veth1.arp_ignore=3
+    ping -c5 -I veth0 127.25.3.14
+    if [ $? -ne 0 ];then
+        echo "failed"
+    else
+        echo "ok"
+    fi
+
+    cleanup
+}
+
+run_all_tests() {
+    run_arp_announce_test
+    run_arp_ignore_test
+}
+
+run_all_tests
-- 
cgit v1.2.3-59-g8ed1b


From de755a85130ede40a988dbc04e449eb56fb998db Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 21 Jun 2019 17:45:29 +0200
Subject: selftests: pmtu: Introduce list_flush_ipv4_exception test case

This test checks that route exceptions can be successfully listed and
flushed using ip -6 route {list,flush} cache.

v7: No changes

v6:
  - Merge this patch into series including fix, as it's also targeted
    for net-next
  - Drop left-over print of 'ip route list cache | wc -l'

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 60 +++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 269e839b747e..f5004a9df229 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -112,6 +112,10 @@
 # - cleanup_ipv6_exception
 #	Same as above, but use IPv6 transport from A to B
 #
+# - list_flush_ipv4_exception
+#	Using the same topology as in pmtu_ipv4, create exceptions, and check
+#	they are shown when listing exception caches, gone after flushing them
+#
 # - list_flush_ipv6_exception
 #	Using the same topology as in pmtu_ipv6, create exceptions, and check
 #	they are shown when listing exception caches, gone after flushing them
@@ -156,6 +160,7 @@ tests="
 	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes	0
 	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions	1
 	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1
+	list_flush_ipv4_exception	ipv4: list and flush cached exceptions	1
 	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1"
 
 NS_A="ns-A"
@@ -1207,6 +1212,61 @@ run_test_nh() {
 	USE_NH=no
 }
 
+test_list_flush_ipv4_exception() {
+	setup namespaces routing || return 2
+	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
+	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
+	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
+
+	dst_prefix1="${prefix4}.${b_r1}."
+	dst2="${prefix4}.${b_r2}.1"
+
+	# Set up initial MTU values
+	mtu "${ns_a}"  veth_A-R1 2000
+	mtu "${ns_r1}" veth_R1-A 2000
+	mtu "${ns_r1}" veth_R1-B 1500
+	mtu "${ns_b}"  veth_B-R1 1500
+
+	mtu "${ns_a}"  veth_A-R2 2000
+	mtu "${ns_r2}" veth_R2-A 2000
+	mtu "${ns_r2}" veth_R2-B 1500
+	mtu "${ns_b}"  veth_B-R2 1500
+
+	fail=0
+
+	# Add 100 addresses for veth endpoint on B reached by default A route
+	for i in $(seq 100 199); do
+		run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
+	done
+
+	# Create 100 cached route exceptions for path via R1, one via R2. Note
+	# that with IPv4 we need to actually cause a route lookup that matches
+	# the exception caused by ICMP, in order to actually have a cached
+	# route, so we need to ping each destination twice
+	for i in $(seq 100 199); do
+		run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
+	done
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
+
+	# Each exception is printed as two lines
+	if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then
+		err "  can't list cached exceptions"
+		fail=1
+	fi
+
+	run_cmd ${ns_a} ip route flush cache
+	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
+	pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
+	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
+	   [ -n "$(${ns_a} ip route list cache)" ]; then
+		err "  can't flush cached exceptions"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
 test_list_flush_ipv6_exception() {
 	setup namespaces routing || return 2
 	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
-- 
cgit v1.2.3-59-g8ed1b


From b964641e9925be1cb2c6dbf1d3ecaf6b594c15c0 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Fri, 21 Jun 2019 17:45:30 +0200
Subject: selftests: pmtu: Make list_flush_ipv6_exception test more demanding

Instead of just listing and flushing two cached exceptions, create
a relatively big number of them, and count how many are listed. Single
netlink dump messages contain approximately 25 entries each, and this
way we can make sure the partial dump tracking mechanism is working
properly.

While at it, also ensure that no cached routes can be listed after
flush, and remove 'sleep 1' calls, they are not actually needed.

v7: No changes

v6:
  - Merge this patch into series including fix, as it's also targeted
    for net-next. No actual changes

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index f5004a9df229..ab367e75f095 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1274,7 +1274,7 @@ test_list_flush_ipv6_exception() {
 	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
 	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
 
-	dst1="${prefix6}:${b_r1}::1"
+	dst_prefix1="${prefix6}:${b_r1}::"
 	dst2="${prefix6}:${b_r2}::1"
 
 	# Set up initial MTU values
@@ -1290,20 +1290,26 @@ test_list_flush_ipv6_exception() {
 
 	fail=0
 
-	# Create route exceptions
-	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
-	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
+	# Add 100 addresses for veth endpoint on B reached by default A route
+	for i in $(seq 100 199); do
+		run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
+	done
 
-	if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 2 ]; then
+	# Create 100 cached route exceptions for path via R1, one via R2
+	for i in $(seq 100 199); do
+		run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
+	done
+	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
+	if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then
 		err "  can't list cached exceptions"
 		fail=1
 	fi
 
 	run_cmd ${ns_a} ip -6 route flush cache
-	sleep 1
-	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
 	pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
-	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ]; then
+	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
+	   [ -n "$(${ns_a} ip -6 route list cache)" ]; then
 		err "  can't flush cached exceptions"
 		fail=1
 	fi
-- 
cgit v1.2.3-59-g8ed1b


From 900de4ac4953fcf5e25ea45bea5bb28e797b1420 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 19 Jun 2019 17:27:42 +0100
Subject: libbpf: fix spelling mistake "conflictling" -> "conflicting"

There are several spelling mistakes in pr_warning messages. Fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4259c9f0cfe7..68f45a96769f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1169,7 +1169,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			pr_debug("map '%s': found key_size = %u.\n",
 				 map_name, sz);
 			if (map->def.key_size && map->def.key_size != sz) {
-				pr_warning("map '%s': conflictling key size %u != %u.\n",
+				pr_warning("map '%s': conflicting key size %u != %u.\n",
 					   map_name, map->def.key_size, sz);
 				return -EINVAL;
 			}
@@ -1197,7 +1197,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			pr_debug("map '%s': found key [%u], sz = %lld.\n",
 				 map_name, t->type, sz);
 			if (map->def.key_size && map->def.key_size != sz) {
-				pr_warning("map '%s': conflictling key size %u != %lld.\n",
+				pr_warning("map '%s': conflicting key size %u != %lld.\n",
 					   map_name, map->def.key_size, sz);
 				return -EINVAL;
 			}
@@ -1212,7 +1212,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			pr_debug("map '%s': found value_size = %u.\n",
 				 map_name, sz);
 			if (map->def.value_size && map->def.value_size != sz) {
-				pr_warning("map '%s': conflictling value size %u != %u.\n",
+				pr_warning("map '%s': conflicting value size %u != %u.\n",
 					   map_name, map->def.value_size, sz);
 				return -EINVAL;
 			}
@@ -1240,7 +1240,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			pr_debug("map '%s': found value [%u], sz = %lld.\n",
 				 map_name, t->type, sz);
 			if (map->def.value_size && map->def.value_size != sz) {
-				pr_warning("map '%s': conflictling value size %u != %lld.\n",
+				pr_warning("map '%s': conflicting value size %u != %lld.\n",
 					   map_name, map->def.value_size, sz);
 				return -EINVAL;
 			}
-- 
cgit v1.2.3-59-g8ed1b


From 88091ff56b71d73c5577ebefcd4f0f721a359077 Mon Sep 17 00:00:00 2001
From: Toshiaki Makita <toshiaki.makita1@gmail.com>
Date: Thu, 20 Jun 2019 11:23:23 +0900
Subject: selftests, bpf: Add test for veth native XDP

Add a test case for veth native XDP. It checks if XDP_PASS, XDP_TX and
XDP_REDIRECT work properly.

  $ cd tools/testing/selftests/bpf
  $ make \
  	TEST_CUSTOM_PROGS= \
  	TEST_GEN_PROGS= \
  	TEST_GEN_PROGS_EXTENDED= \
  	TEST_PROGS_EXTENDED= \
  	TEST_PROGS="test_xdp_veth.sh" \
  	run_tests
  TAP version 13
  1..1
  # selftests: bpf: test_xdp_veth.sh
  # PING 10.1.1.33 (10.1.1.33) 56(84) bytes of data.
  # 64 bytes from 10.1.1.33: icmp_seq=1 ttl=64 time=0.073 ms
  #
  # --- 10.1.1.33 ping statistics ---
  # 1 packets transmitted, 1 received, 0% packet loss, time 0ms
  # rtt min/avg/max/mdev = 0.073/0.073/0.073/0.000 ms
  # selftests: xdp_veth [PASS]
  ok 1 selftests: bpf: test_xdp_veth.sh

Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               |   1 +
 .../testing/selftests/bpf/progs/xdp_redirect_map.c |  31 ++++++
 tools/testing/selftests/bpf/progs/xdp_tx.c         |  12 +++
 tools/testing/selftests/bpf/test_xdp_veth.sh       | 118 +++++++++++++++++++++
 4 files changed, 162 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_redirect_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/xdp_tx.c
 create mode 100755 tools/testing/selftests/bpf/test_xdp_veth.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index fb5ce43e28b3..f2dbe2043067 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -46,6 +46,7 @@ TEST_PROGS := test_kmod.sh \
 	test_libbpf.sh \
 	test_xdp_redirect.sh \
 	test_xdp_meta.sh \
+	test_xdp_veth.sh \
 	test_offload.py \
 	test_sock_addr.sh \
 	test_tunnel.sh \
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
new file mode 100644
index 000000000000..e87a985b9df9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") tx_port = {
+	.type = BPF_MAP_TYPE_DEVMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 8,
+};
+
+SEC("redirect_map_0")
+int xdp_redirect_map_0(struct xdp_md *xdp)
+{
+	return bpf_redirect_map(&tx_port, 0, 0);
+}
+
+SEC("redirect_map_1")
+int xdp_redirect_map_1(struct xdp_md *xdp)
+{
+	return bpf_redirect_map(&tx_port, 1, 0);
+}
+
+SEC("redirect_map_2")
+int xdp_redirect_map_2(struct xdp_md *xdp)
+{
+	return bpf_redirect_map(&tx_port, 2, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
new file mode 100644
index 000000000000..57912e7c94b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+SEC("tx")
+int xdp_tx(struct xdp_md *xdp)
+{
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
new file mode 100755
index 000000000000..ba8ffcdaac30
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Create 3 namespaces with 3 veth peers, and
+# forward packets in-between using native XDP
+#
+#                      XDP_TX
+# NS1(veth11)        NS2(veth22)        NS3(veth33)
+#      |                  |                  |
+#      |                  |                  |
+#   (veth1,            (veth2,            (veth3,
+#   id:111)            id:122)            id:133)
+#     ^ |                ^ |                ^ |
+#     | |  XDP_REDIRECT  | |  XDP_REDIRECT  | |
+#     | ------------------ ------------------ |
+#     -----------------------------------------
+#                    XDP_REDIRECT
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME=xdp_veth
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+	set +e
+	ip link del veth1 2> /dev/null
+	ip link del veth2 2> /dev/null
+	ip link del veth3 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+	ip netns del ns3 2> /dev/null
+	rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+	echo "selftests: $TESTNAME [SKIP]"
+	_cleanup
+
+	exit $ksft_skip
+}
+
+cleanup()
+{
+	if [ "$?" = 0 ]; then
+		echo "selftests: $TESTNAME [PASS]"
+	else
+		echo "selftests: $TESTNAME [FAILED]"
+	fi
+	_cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+	echo "selftests: $TESTNAME [SKIP] Need root privileges"
+	exit $ksft_skip
+fi
+
+if ! ip link set dev lo xdp off > /dev/null 2>&1; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without the ip xdp support"
+	exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+	exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+	exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+
+ip link add veth1 index 111 type veth peer name veth11 netns ns1
+ip link add veth2 index 122 type veth peer name veth22 netns ns2
+ip link add veth3 index 133 type veth peer name veth33 netns ns3
+
+ip link set veth1 up
+ip link set veth2 up
+ip link set veth3 up
+
+ip -n ns1 addr add 10.1.1.11/24 dev veth11
+ip -n ns3 addr add 10.1.1.33/24 dev veth33
+
+ip -n ns1 link set dev veth11 up
+ip -n ns2 link set dev veth22 up
+ip -n ns3 link set dev veth33 up
+
+mkdir $BPF_DIR
+bpftool prog loadall \
+	xdp_redirect_map.o $BPF_DIR/progs type xdp \
+	pinmaps $BPF_DIR/maps
+bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0
+bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0
+bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0
+ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
+ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
+ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
+
+ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
+
+trap cleanup EXIT
+
+ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
+
+exit 0
-- 
cgit v1.2.3-59-g8ed1b


From 489ce2f42514b4324e5c63e93ae7570cab995254 Mon Sep 17 00:00:00 2001
From: Lucas Bates <lucasb@mojatatu.com>
Date: Mon, 24 Jun 2019 21:00:27 -0400
Subject: tc-testing: Restore original behaviour for namespaces in tdc

This patch restores the original behaviour for tdc prior to the
introduction of the plugin system, where the network namespace
functionality was split from the main script.

It introduces the concept of required plugins for testcases,
and will automatically load any plugin that isn't already
enabled when said plugin is required by even one testcase.

Additionally, the -n option for the nsPlugin is deprecated
so the default action is to make use of the namespaces.
Instead, we introduce -N to not use them, but still create
the veth pair.

buildebpfPlugin's -B option is also deprecated.

If a test cases requires the features of a specific plugin
in order to pass, it should instead include a new key/value
pair describing plugin interactions:

        "plugins": {
                "requires": "buildebpfPlugin"
        },

A test case can have more than one required plugin: a list
can be inserted as the value for 'requires'.

Signed-off-by: Lucas Bates <lucasb@mojatatu.com>
Acked-by: Davide Caratti <dcaratti@redhat.com>
Tested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/README          |  22 ++-
 .../tc-testing/plugin-lib/buildebpfPlugin.py       |   5 +-
 .../selftests/tc-testing/plugin-lib/nsPlugin.py    |  26 +++-
 .../selftests/tc-testing/tc-tests/actions/bpf.json |   6 +
 .../selftests/tc-testing/tc-tests/filters/fw.json  | 162 +++++++++++++++++++++
 .../tc-testing/tc-tests/filters/tests.json         |  12 ++
 tools/testing/selftests/tc-testing/tdc.py          |  78 +++++++++-
 tools/testing/selftests/tc-testing/tdc_helper.py   |   5 +-
 8 files changed, 296 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index f9281e8aa313..22e5da9008fd 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -12,10 +12,10 @@ REQUIREMENTS
 *  Minimum Python version of 3.4. Earlier 3.X versions may work but are not
    guaranteed.
 
-*  The kernel must have network namespace support
+*  The kernel must have network namespace support if using nsPlugin
 
 *  The kernel must have veth support available, as a veth pair is created
-   prior to running the tests.
+   prior to running the tests when using nsPlugin.
 
 *  The kernel must have the appropriate infrastructure enabled to run all tdc
    unit tests. See the config file in this directory for minimum required
@@ -53,8 +53,12 @@ commands being tested must be run as root.  The code that enforces
 execution by root uid has been moved into a plugin (see PLUGIN
 ARCHITECTURE, below).
 
-If nsPlugin is linked, all tests are executed inside a network
-namespace to prevent conflicts within the host.
+Tests that use a network device should have nsPlugin.py listed as a
+requirement for that test. nsPlugin executes all commands within a
+network namespace and creates a veth pair which may be used in those test
+cases. To disable execution within the namespace, pass the -N option
+to tdc when starting a test run; the veth pair will still be created
+by the plugin.
 
 Running tdc without any arguments will run all tests. Refer to the section
 on command line arguments for more information, or run:
@@ -154,8 +158,8 @@ action:
 netns:
   options for nsPlugin (run commands in net namespace)
 
-  -n, --namespace
-                        Run commands in namespace as specified in tdc_config.py
+  -N, --no-namespace
+                        Do not run commands in a network namespace.
 
 valgrind:
   options for valgrindPlugin (run command under test under Valgrind)
@@ -171,7 +175,8 @@ was in the tdc.py script has been moved into the plugins.
 
 The plugins are in the directory plugin-lib.  The are executed from
 directory plugins.  Put symbolic links from plugins to plugin-lib,
-and name them according to the order you want them to run.
+and name them according to the order you want them to run. This is not
+necessary if a test case being run requires a specific plugin to work.
 
 Example:
 
@@ -223,7 +228,8 @@ directory:
   - rootPlugin.py:
       implements the enforcement of running as root
   - nsPlugin.py:
-      sets up a network namespace and runs all commands in that namespace
+      sets up a network namespace and runs all commands in that namespace,
+      while also setting up dummy devices to be used in testing.
   - valgrindPlugin.py
       runs each command in the execute stage under valgrind,
       and checks for leaks.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
index 9f0ba10c44b4..e98c36750fae 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
@@ -34,8 +34,9 @@ class SubPlugin(TdcPlugin):
             'buildebpf',
             'options for buildebpfPlugin')
         self.argparser_group.add_argument(
-            '-B', '--buildebpf', action='store_true',
-            help='build eBPF programs')
+            '--nobuildebpf', action='store_false', default=True,
+            dest='buildebpf',
+            help='Don\'t build eBPF programs')
 
         return self.argparser
 
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index a194b1af2b30..affa7f2d9670 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -18,6 +18,8 @@ class SubPlugin(TdcPlugin):
 
         if self.args.namespace:
             self._ns_create()
+        else:
+            self._ports_create()
 
     def post_suite(self, index):
         '''run commands after test_runner goes into a test loop'''
@@ -27,6 +29,8 @@ class SubPlugin(TdcPlugin):
 
         if self.args.namespace:
             self._ns_destroy()
+        else:
+            self._ports_destroy()
 
     def add_args(self, parser):
         super().add_args(parser)
@@ -34,8 +38,8 @@ class SubPlugin(TdcPlugin):
             'netns',
             'options for nsPlugin(run commands in net namespace)')
         self.argparser_group.add_argument(
-            '-n', '--namespace', action='store_true',
-            help='Run commands in namespace')
+            '-N', '--no-namespace', action='store_false', default=True,
+            dest='namespace', help='Don\'t run commands in namespace')
         return self.argparser
 
     def adjust_command(self, stage, command):
@@ -73,20 +77,30 @@ class SubPlugin(TdcPlugin):
             print('adjust_command:  return command [{}]'.format(command))
         return command
 
+    def _ports_create(self):
+        cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+        self._exec_cmd('pre', cmd)
+        cmd = 'ip link set $DEV0 up'
+        self._exec_cmd('pre', cmd)
+        if not self.args.namespace:
+            cmd = 'ip link set $DEV1 up'
+            self._exec_cmd('pre', cmd)
+
+    def _ports_destroy(self):
+        cmd = 'ip link del $DEV0'
+        self._exec_cmd('post', cmd)
+
     def _ns_create(self):
         '''
         Create the network namespace in which the tests will be run and set up
         the required network devices for it.
         '''
+        self._ports_create()
         if self.args.namespace:
             cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
             self._exec_cmd('pre', cmd)
-            cmd = 'ip link add $DEV0 type veth peer name $DEV1'
-            self._exec_cmd('pre', cmd)
             cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
             self._exec_cmd('pre', cmd)
-            cmd = 'ip link set $DEV0 up'
-            self._exec_cmd('pre', cmd)
             cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
             self._exec_cmd('pre', cmd)
             if self.args.device:
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index b074ea9b6fe8..47a3082b6661 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -54,6 +54,9 @@
             "actions",
             "bpf"
         ],
+        "plugins": {
+                "requires": "buildebpfPlugin"
+        },
         "setup": [
             [
                 "$TC action flush action bpf",
@@ -78,6 +81,9 @@
             "actions",
             "bpf"
         ],
+        "plugins": {
+                "requires": "buildebpfPlugin"
+        },
         "setup": [
             [
                 "$TC action flush action bpf",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
index 6944b90d4897..5272049566d6 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -6,6 +6,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -25,6 +28,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -44,6 +50,114 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -872,6 +986,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: protocol 802_3 prio 3 handle 7 fw action ok"
@@ -892,6 +1009,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: prio 6 handle 2 fw action continue index 5"
@@ -912,6 +1032,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -931,6 +1054,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -950,6 +1076,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
@@ -972,6 +1101,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
@@ -994,6 +1126,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
@@ -1015,6 +1150,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 4 fw action ok",
@@ -1036,6 +1174,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 chain 13 fw action pipe",
@@ -1057,6 +1198,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action drop"
@@ -1077,6 +1221,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 3 prio 4 fw action continue"
@@ -1097,6 +1244,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw action pipe"
@@ -1117,6 +1267,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 fw action pipe flowid 45"
@@ -1137,6 +1290,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
@@ -1157,6 +1313,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
@@ -1177,6 +1336,9 @@
             "filter",
             "fw"
         ],
+	"plugins": {
+		"requires": "nsPlugin"
+	},
         "setup": [
             "$TC qdisc add dev $DEV1 ingress",
             "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 3"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index e2f92cefb8d5..8135778040c2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -6,6 +6,9 @@
             "filter",
             "u32"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
@@ -25,6 +28,9 @@
             "filter",
             "matchall"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV1 clsact",
             "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
@@ -45,6 +51,9 @@
             "filter",
             "flower"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
             "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
@@ -66,6 +75,9 @@
             "filter",
             "flower"
         ],
+        "plugins": {
+                "requires": "nsPlugin"
+        },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
             "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 5cee15659e5f..678182a2676d 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -25,6 +25,9 @@ from tdc_helper import *
 import TdcPlugin
 from TdcResults import *
 
+class PluginDependencyException(Exception):
+    def __init__(self, missing_pg):
+        self.missing_pg = missing_pg
 
 class PluginMgrTestFail(Exception):
     def __init__(self, stage, output, message):
@@ -37,7 +40,7 @@ class PluginMgr:
         super().__init__()
         self.plugins = {}
         self.plugin_instances = []
-        self.args = []
+        self.failed_plugins = {}
         self.argparser = argparser
 
         # TODO, put plugins in order
@@ -53,6 +56,64 @@ class PluginMgr:
                     self.plugins[mn] = foo
                     self.plugin_instances.append(foo.SubPlugin())
 
+    def load_plugin(self, pgdir, pgname):
+        pgname = pgname[0:-3]
+        foo = importlib.import_module('{}.{}'.format(pgdir, pgname))
+        self.plugins[pgname] = foo
+        self.plugin_instances.append(foo.SubPlugin())
+        self.plugin_instances[-1].check_args(self.args, None)
+
+    def get_required_plugins(self, testlist):
+        '''
+        Get all required plugins from the list of test cases and return
+        all unique items.
+        '''
+        reqs = []
+        for t in testlist:
+            try:
+                if 'requires' in t['plugins']:
+                    if isinstance(t['plugins']['requires'], list):
+                        reqs.extend(t['plugins']['requires'])
+                    else:
+                        reqs.append(t['plugins']['requires'])
+            except KeyError:
+                continue
+        reqs = get_unique_item(reqs)
+        return reqs
+
+    def load_required_plugins(self, reqs, parser, args, remaining):
+        '''
+        Get all required plugins from the list of test cases and load any plugin
+        that is not already enabled.
+        '''
+        pgd = ['plugin-lib', 'plugin-lib-custom']
+        pnf = []
+
+        for r in reqs:
+            if r not in self.plugins:
+                fname = '{}.py'.format(r)
+                source_path = []
+                for d in pgd:
+                    pgpath = '{}/{}'.format(d, fname)
+                    if os.path.isfile(pgpath):
+                        source_path.append(pgpath)
+                if len(source_path) == 0:
+                    print('ERROR: unable to find required plugin {}'.format(r))
+                    pnf.append(fname)
+                    continue
+                elif len(source_path) > 1:
+                    print('WARNING: multiple copies of plugin {} found, using version found')
+                    print('at {}'.format(source_path[0]))
+                pgdir = source_path[0]
+                pgdir = pgdir.split('/')[0]
+                self.load_plugin(pgdir, fname)
+        if len(pnf) > 0:
+            raise PluginDependencyException(pnf)
+
+        parser = self.call_add_args(parser)
+        (args, remaining) = parser.parse_known_args(args=remaining, namespace=args)
+        return args
+
     def call_pre_suite(self, testcount, testidlist):
         for pgn_inst in self.plugin_instances:
             pgn_inst.pre_suite(testcount, testidlist)
@@ -98,6 +159,9 @@ class PluginMgr:
             command = pgn_inst.adjust_command(stage, command)
         return command
 
+    def set_args(self, args):
+        self.args = args
+
     @staticmethod
     def _make_argparser(args):
         self.argparser = argparse.ArgumentParser(
@@ -550,6 +614,7 @@ def filter_tests_by_category(args, testlist):
 
     return answer
 
+
 def get_test_cases(args):
     """
     If a test case file is specified, retrieve tests from that file.
@@ -611,7 +676,7 @@ def get_test_cases(args):
     return allcatlist, allidlist, testcases_by_cats, alltestcases
 
 
-def set_operation_mode(pm, args):
+def set_operation_mode(pm, parser, args, remaining):
     """
     Load the test case data and process remaining arguments to determine
     what the script should do for this run, and call the appropriate
@@ -649,6 +714,12 @@ def set_operation_mode(pm, args):
             exit(0)
 
     if len(alltests):
+        req_plugins = pm.get_required_plugins(alltests)
+        try:
+            args = pm.load_required_plugins(req_plugins, parser, args, remaining)
+        except PluginDependencyException as pde:
+            print('The following plugins were not found:')
+            print('{}'.format(pde.missing_pg))
         catresults = test_runner(pm, args, alltests)
         if args.format == 'none':
             print('Test results output suppression requested\n')
@@ -686,11 +757,12 @@ def main():
     parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
     args.NAMES = NAMES
+    pm.set_args(args)
     check_default_settings(args, remaining, pm)
     if args.verbose > 2:
         print('args is {}'.format(args))
 
-    set_operation_mode(pm, args)
+    set_operation_mode(pm, parser, args, remaining)
 
     exit(0)
 
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
index 9f35c96c88a0..0440d252c4c5 100644
--- a/tools/testing/selftests/tc-testing/tdc_helper.py
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -17,7 +17,10 @@ def get_categorized_testlist(alltests, ucat):
 
 def get_unique_item(lst):
     """ For a list, return a list of the unique items in the list. """
-    return list(set(lst))
+    if len(lst) > 1:
+        return list(set(lst))
+    else:
+        return lst
 
 
 def get_test_categories(alltests):
-- 
cgit v1.2.3-59-g8ed1b


From 2f8776f0c5d3a4359810b68402ea707421009362 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Tue, 25 Jun 2019 14:18:52 -0400
Subject: tc-testing: add ingress qdisc tests

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/qdiscs/ingress.json        | 102 +++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
new file mode 100644
index 000000000000..f518c55f468b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
@@ -0,0 +1,102 @@
+[
+    {
+        "id": "9872",
+        "name": "Add ingress qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "5c5e",
+        "name": "Add ingress qdisc with unsupported argument",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress foorbar",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "74f6",
+        "name": "Add duplicate ingress qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "f769",
+        "name": "Delete nonexistent ingress qdisc",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "3b88",
+        "name": "Delete ingress qdisc twice",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC qdisc del dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3-59-g8ed1b


From 950649791b8366a4b89f570053d37bba1eb21c02 Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Wed, 26 Jun 2019 13:38:37 +0300
Subject: libbpf: fix max() type mismatch for 32bit

It fixes build error for 32bit caused by type mismatch
size_t/unsigned long.

Fixes: bf82927125dd ("libbpf: refactor map initialization")
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 68f45a96769f..5186b7710430 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -778,7 +778,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 	if (obj->nr_maps < obj->maps_cap)
 		return &obj->maps[obj->nr_maps++];
 
-	new_cap = max(4ul, obj->maps_cap * 3 / 2);
+	new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
 	new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
 	if (!new_maps) {
 		pr_warning("alloc maps for object failed\n");
-- 
cgit v1.2.3-59-g8ed1b


From 6bcc617f842cdb5f3d66b3a3f06614a62fab992b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 25 Jun 2019 15:56:28 -0700
Subject: selftests/bpf: build tests with debug info

Non-BPF (user land) part of selftests is built without debug info making
occasional debugging with gdb terrible. Build with debug info always.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f2dbe2043067..faeb8220daa2 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -15,7 +15,7 @@ LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 LLVM_READELF	?= llvm-readelf
 BTF_PAHOLE	?= pahole
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
+CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \
 	  -Dbpf_prog_load=bpf_prog_test_load \
 	  -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lrt -lpthread
-- 
cgit v1.2.3-59-g8ed1b


From 6c6874f401e5a0caab3b6a0663169e1fb5e930bb Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 25 Jun 2019 09:56:31 -0700
Subject: tools: bpftool: use correct argument in cgroup errors

cgroup code tries to use argv[0] as the cgroup path,
but if it fails uses argv[1] to report errors.

Fixes: 5ccda64d38cc ("bpftool: implement cgroup bpf operations")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/cgroup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 73ec8ea33fb4..a13fb7265d1a 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -168,7 +168,7 @@ static int do_show(int argc, char **argv)
 
 	cgroup_fd = open(argv[0], O_RDONLY);
 	if (cgroup_fd < 0) {
-		p_err("can't open cgroup %s", argv[1]);
+		p_err("can't open cgroup %s", argv[0]);
 		goto exit;
 	}
 
@@ -356,7 +356,7 @@ static int do_attach(int argc, char **argv)
 
 	cgroup_fd = open(argv[0], O_RDONLY);
 	if (cgroup_fd < 0) {
-		p_err("can't open cgroup %s", argv[1]);
+		p_err("can't open cgroup %s", argv[0]);
 		goto exit;
 	}
 
@@ -414,7 +414,7 @@ static int do_detach(int argc, char **argv)
 
 	cgroup_fd = open(argv[0], O_RDONLY);
 	if (cgroup_fd < 0) {
-		p_err("can't open cgroup %s", argv[1]);
+		p_err("can't open cgroup %s", argv[0]);
 		goto exit;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From bb2bd090854cc608d4d8a4d14955eb13dffa94ab Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 27 Jun 2019 14:03:33 +0200
Subject: selftests: rtnetlink: add small test case with 'promote_secondaries'
 enabled

This exercises the 'promote_secondaries' code path.

Without previous fix, this triggers infinite loop/soft lockup:
ifconfig process spinning at 100%, never to return.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index ed606a2e3865..505628884783 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -269,6 +269,25 @@ kci_test_addrlft()
 	echo "PASS: preferred_lft addresses have expired"
 }
 
+kci_test_promote_secondaries()
+{
+	promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
+
+	sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
+
+	for i in $(seq 2 254);do
+		IP="10.23.11.$i"
+		ip -f inet addr add $IP/16 brd + dev "$devdummy"
+		ifconfig "$devdummy" $IP netmask 255.255.0.0
+	done
+
+	ip addr flush dev "$devdummy"
+
+	[ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0
+
+	echo "PASS: promote_secondaries complete"
+}
+
 kci_test_addrlabel()
 {
 	ret=0
@@ -1161,6 +1180,7 @@ kci_test_rtnl()
 	kci_test_polrouting
 	kci_test_route_get
 	kci_test_addrlft
+	kci_test_promote_secondaries
 	kci_test_tc
 	kci_test_gre
 	kci_test_gretap
-- 
cgit v1.2.3-59-g8ed1b


From 2640d3c8123223e0a205b2a25a446df6f072b3ea Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Wed, 26 Jun 2019 17:35:25 +0300
Subject: xsk: Add getsockopt XDP_OPTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make it possible for the application to determine whether the AF_XDP
socket is running in zero-copy mode. To achieve this, add a new
getsockopt option XDP_OPTIONS that returns flags. The only flag
supported for now is the zero-copy mode indicator.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/if_xdp.h       |  8 ++++++++
 net/xdp/xsk.c                     | 20 ++++++++++++++++++++
 tools/include/uapi/linux/if_xdp.h |  8 ++++++++
 3 files changed, 36 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index caed8b1614ff..faaa5ca2a117 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -46,6 +46,7 @@ struct xdp_mmap_offsets {
 #define XDP_UMEM_FILL_RING		5
 #define XDP_UMEM_COMPLETION_RING	6
 #define XDP_STATISTICS			7
+#define XDP_OPTIONS			8
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
@@ -60,6 +61,13 @@ struct xdp_statistics {
 	__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
 };
 
+struct xdp_options {
+	__u32 flags;
+};
+
+/* Flags for the flags field of struct xdp_options */
+#define XDP_OPTIONS_ZEROCOPY (1 << 0)
+
 /* Pgoff for mmaping the rings */
 #define XDP_PGOFF_RX_RING			  0
 #define XDP_PGOFF_TX_RING		 0x80000000
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b68a380f50b3..35ca531ac74e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -650,6 +650,26 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
 
 		return 0;
 	}
+	case XDP_OPTIONS:
+	{
+		struct xdp_options opts = {};
+
+		if (len < sizeof(opts))
+			return -EINVAL;
+
+		mutex_lock(&xs->mutex);
+		if (xs->zc)
+			opts.flags |= XDP_OPTIONS_ZEROCOPY;
+		mutex_unlock(&xs->mutex);
+
+		len = sizeof(opts);
+		if (copy_to_user(optval, &opts, len))
+			return -EFAULT;
+		if (put_user(len, optlen))
+			return -EFAULT;
+
+		return 0;
+	}
 	default:
 		break;
 	}
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index caed8b1614ff..faaa5ca2a117 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -46,6 +46,7 @@ struct xdp_mmap_offsets {
 #define XDP_UMEM_FILL_RING		5
 #define XDP_UMEM_COMPLETION_RING	6
 #define XDP_STATISTICS			7
+#define XDP_OPTIONS			8
 
 struct xdp_umem_reg {
 	__u64 addr; /* Start of packet data area */
@@ -60,6 +61,13 @@ struct xdp_statistics {
 	__u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
 };
 
+struct xdp_options {
+	__u32 flags;
+};
+
+/* Flags for the flags field of struct xdp_options */
+#define XDP_OPTIONS_ZEROCOPY (1 << 0)
+
 /* Pgoff for mmaping the rings */
 #define XDP_PGOFF_RX_RING			  0
 #define XDP_PGOFF_TX_RING		 0x80000000
-- 
cgit v1.2.3-59-g8ed1b


From 2761ed4b6e192820760d5ba913834b2ba05fd08c Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Wed, 26 Jun 2019 17:35:26 +0300
Subject: libbpf: Support getsockopt XDP_OPTIONS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Query XDP_OPTIONS in libbpf to determine if the zero-copy mode is active
or not.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/xsk.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 7ef6293b4fd7..bf15a80a37c2 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -65,6 +65,7 @@ struct xsk_socket {
 	int xsks_map_fd;
 	__u32 queue_id;
 	char ifname[IFNAMSIZ];
+	bool zc;
 };
 
 struct xsk_nl_info {
@@ -480,6 +481,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	void *rx_map = NULL, *tx_map = NULL;
 	struct sockaddr_xdp sxdp = {};
 	struct xdp_mmap_offsets off;
+	struct xdp_options opts;
 	struct xsk_socket *xsk;
 	socklen_t optlen;
 	int err;
@@ -597,6 +599,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
 	}
 
 	xsk->prog_fd = -1;
+
+	optlen = sizeof(opts);
+	err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen);
+	if (err) {
+		err = -errno;
+		goto out_mmap_tx;
+	}
+
+	xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY;
+
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 		err = xsk_setup_xdp_prog(xsk);
 		if (err)
-- 
cgit v1.2.3-59-g8ed1b


From 123e8da1d33042a83cedb530fb5efd64f32ce594 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Wed, 26 Jun 2019 17:35:27 +0300
Subject: xsk: Change the default frame size to 4096 and allow controlling it

The typical XDP memory scheme is one packet per page. Change the AF_XDP
frame size in libbpf to 4096, which is the page size on x86, to allow
libbpf to be used with the drivers with the packet-per-page scheme.

Add a command line option -f to xdpsock to allow to specify a custom
frame size.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/xdpsock_user.c | 44 ++++++++++++++++++++++++++++----------------
 tools/lib/bpf/xsk.h        |  2 +-
 2 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 0f5eb0d7f2df..93eaaf7239b2 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -68,6 +68,7 @@ static int opt_queue;
 static int opt_poll;
 static int opt_interval = 1;
 static u32 opt_xdp_bind_flags;
+static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 static __u32 prog_id;
 
 struct xsk_umem_info {
@@ -276,6 +277,12 @@ static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
 static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
 {
 	struct xsk_umem_info *umem;
+	struct xsk_umem_config cfg = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = opt_xsk_frame_size,
+		.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+	};
 	int ret;
 
 	umem = calloc(1, sizeof(*umem));
@@ -283,7 +290,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
 		exit_with_error(errno);
 
 	ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
-			       NULL);
+			       &cfg);
 	if (ret)
 		exit_with_error(-ret);
 
@@ -323,11 +330,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem)
 				     &idx);
 	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
 		exit_with_error(-ret);
-	for (i = 0;
-	     i < XSK_RING_PROD__DEFAULT_NUM_DESCS *
-		     XSK_UMEM__DEFAULT_FRAME_SIZE;
-	     i += XSK_UMEM__DEFAULT_FRAME_SIZE)
-		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i;
+	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
+		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) =
+			i * opt_xsk_frame_size;
 	xsk_ring_prod__submit(&xsk->umem->fq,
 			      XSK_RING_PROD__DEFAULT_NUM_DESCS);
 
@@ -346,6 +351,7 @@ static struct option long_options[] = {
 	{"interval", required_argument, 0, 'n'},
 	{"zero-copy", no_argument, 0, 'z'},
 	{"copy", no_argument, 0, 'c'},
+	{"frame-size", required_argument, 0, 'f'},
 	{0, 0, 0, 0}
 };
 
@@ -365,8 +371,9 @@ static void usage(const char *prog)
 		"  -n, --interval=n	Specify statistics update interval (default 1 sec).\n"
 		"  -z, --zero-copy      Force zero-copy mode.\n"
 		"  -c, --copy           Force copy mode.\n"
+		"  -f, --frame-size=n   Set the frame size (must be a power of two, default is %d).\n"
 		"\n";
-	fprintf(stderr, str, prog);
+	fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
 	exit(EXIT_FAILURE);
 }
 
@@ -377,7 +384,7 @@ static void parse_command_line(int argc, char **argv)
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options,
+		c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options,
 				&option_index);
 		if (c == -1)
 			break;
@@ -420,6 +427,9 @@ static void parse_command_line(int argc, char **argv)
 		case 'F':
 			opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
 			break;
+		case 'f':
+			opt_xsk_frame_size = atoi(optarg);
+			break;
 		default:
 			usage(basename(argv[0]));
 		}
@@ -432,6 +442,11 @@ static void parse_command_line(int argc, char **argv)
 		usage(basename(argv[0]));
 	}
 
+	if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) {
+		fprintf(stderr, "--frame-size=%d is not a power of two\n",
+			opt_xsk_frame_size);
+		usage(basename(argv[0]));
+	}
 }
 
 static void kick_tx(struct xsk_socket_info *xsk)
@@ -583,8 +598,7 @@ static void tx_only(struct xsk_socket_info *xsk)
 
 			for (i = 0; i < BATCH_SIZE; i++) {
 				xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr
-					= (frame_nb + i) <<
-					XSK_UMEM__DEFAULT_FRAME_SHIFT;
+					= (frame_nb + i) * opt_xsk_frame_size;
 				xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
 					sizeof(pkt_data) - 1;
 			}
@@ -661,21 +675,19 @@ int main(int argc, char **argv)
 	}
 
 	ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
-			     NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
+			     NUM_FRAMES * opt_xsk_frame_size);
 	if (ret)
 		exit_with_error(ret);
 
        /* Create sockets... */
-	umem = xsk_configure_umem(bufs,
-				  NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE);
+	umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
 	xsks[num_socks++] = xsk_configure_socket(umem);
 
 	if (opt_bench == BENCH_TXONLY) {
 		int i;
 
-		for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE;
-		     i += XSK_UMEM__DEFAULT_FRAME_SIZE)
-			(void)gen_eth_frame(umem, i);
+		for (i = 0; i < NUM_FRAMES; i++)
+			(void)gen_eth_frame(umem, i * opt_xsk_frame_size);
 	}
 
 	signal(SIGINT, int_exit);
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index 82ea71a0f3ec..833a6e60d065 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -167,7 +167,7 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
 
 #define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
 #define XSK_RING_PROD__DEFAULT_NUM_DESCS      2048
-#define XSK_UMEM__DEFAULT_FRAME_SHIFT    11 /* 2048 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT    12 /* 4096 bytes */
 #define XSK_UMEM__DEFAULT_FRAME_SIZE     (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
 #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
 
-- 
cgit v1.2.3-59-g8ed1b


From aa6ab6471e525ce6842e95a565f10b56fd6b4d5c Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:48 -0700
Subject: bpf: sync bpf.h to tools/

Export new prog type and hook points to the libbpf.

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b077507efa3f..a396b516a2b2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -170,6 +170,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
 	BPF_PROG_TYPE_CGROUP_SYSCTL,
 	BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+	BPF_PROG_TYPE_CGROUP_SOCKOPT,
 };
 
 enum bpf_attach_type {
@@ -194,6 +195,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_SYSCTL,
 	BPF_CGROUP_UDP4_RECVMSG,
 	BPF_CGROUP_UDP6_RECVMSG,
+	BPF_CGROUP_GETSOCKOPT,
+	BPF_CGROUP_SETSOCKOPT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -3541,4 +3544,15 @@ struct bpf_sysctl {
 				 */
 };
 
+struct bpf_sockopt {
+	__bpf_md_ptr(struct bpf_sock *, sk);
+	__bpf_md_ptr(void *, optval);
+	__bpf_md_ptr(void *, optval_end);
+
+	__s32	level;
+	__s32	optname;
+	__s32	optlen;
+	__s32	retval;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 4cdbfb59c44a0df58ab321e4ddd9710cd0823584 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:49 -0700
Subject: libbpf: support sockopt hooks

Make libbpf aware of new sockopt hooks so it can derive prog type
and hook point from the section names.

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c        | 5 +++++
 tools/lib/bpf/libbpf_probes.c | 1 +
 2 files changed, 6 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5186b7710430..6e6ebef11ba3 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2646,6 +2646,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
 	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 		return false;
 	case BPF_PROG_TYPE_KPROBE:
 	default:
@@ -3604,6 +3605,10 @@ static const struct {
 						BPF_CGROUP_UDP6_RECVMSG),
 	BPF_EAPROG_SEC("cgroup/sysctl",		BPF_PROG_TYPE_CGROUP_SYSCTL,
 						BPF_CGROUP_SYSCTL),
+	BPF_EAPROG_SEC("cgroup/getsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+						BPF_CGROUP_GETSOCKOPT),
+	BPF_EAPROG_SEC("cgroup/setsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
+						BPF_CGROUP_SETSOCKOPT),
 };
 
 #undef BPF_PROG_SEC_IMPL
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 6635a31a7a16..ace1a0708d99 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -101,6 +101,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
 	case BPF_PROG_TYPE_SK_REUSEPORT:
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	default:
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 47ac90bbce5b9f7c1c600a4c81fcfd78cb674798 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:50 -0700
Subject: selftests/bpf: test sockopt section name

Add tests that make sure libbpf section detection works.

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_section_names.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c
index dee2f2eceb0f..29833aeaf0de 100644
--- a/tools/testing/selftests/bpf/test_section_names.c
+++ b/tools/testing/selftests/bpf/test_section_names.c
@@ -134,6 +134,16 @@ static struct sec_name_test tests[] = {
 		{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
 		{0, BPF_CGROUP_SYSCTL},
 	},
+	{
+		"cgroup/getsockopt",
+		{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT},
+		{0, BPF_CGROUP_GETSOCKOPT},
+	},
+	{
+		"cgroup/setsockopt",
+		{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
+		{0, BPF_CGROUP_SETSOCKOPT},
+	},
 };
 
 static int test_prog_type_by_name(const struct sec_name_test *test)
-- 
cgit v1.2.3-59-g8ed1b


From 9ec8a4c9489dc6fac324f8adb76fc182be119389 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:51 -0700
Subject: selftests/bpf: add sockopt test

Add sockopt selftests:
* require proper expected_attach_type
* enforce context field read/write access
* test bpf_sockopt_handled handler
* test EPERM
* test limiting optlen from getsockopt
* test out-of-bounds access

v9:
* add tests for setsockopt argument mangling

v7:
* remove return 2; test retval=0 and optlen=-1

v3:
* use DW for optval{,_end} loads

v2:
* use return code 2 for kernel bypass

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore     |    1 +
 tools/testing/selftests/bpf/Makefile       |    3 +-
 tools/testing/selftests/bpf/test_sockopt.c | 1021 ++++++++++++++++++++++++++++
 3 files changed, 1024 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/test_sockopt.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 7470327edcfe..3fe92601223d 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -39,3 +39,4 @@ libbpf.so.*
 test_hashmap
 test_btf_dump
 xdping
+test_sockopt
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index faeb8220daa2..7f2210402e57 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-	test_btf_dump test_cgroup_attach xdping
+	test_btf_dump test_cgroup_attach xdping test_sockopt
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -103,6 +103,7 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c
 $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
+$(OUTPUT)/test_sockopt: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/test_sockopt.c b/tools/testing/selftests/bpf/test_sockopt.c
new file mode 100644
index 000000000000..23bd0819382d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt.c
@@ -0,0 +1,1021 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH				"/sockopt"
+
+static char bpf_log_buf[4096];
+static bool verbose;
+
+enum sockopt_test_error {
+	OK = 0,
+	DENY_LOAD,
+	DENY_ATTACH,
+	EPERM_GETSOCKOPT,
+	EFAULT_GETSOCKOPT,
+	EPERM_SETSOCKOPT,
+	EFAULT_SETSOCKOPT,
+};
+
+static struct sockopt_test {
+	const char			*descr;
+	const struct bpf_insn		insns[64];
+	enum bpf_attach_type		attach_type;
+	enum bpf_attach_type		expected_attach_type;
+
+	int				set_optname;
+	int				set_level;
+	const char			set_optval[64];
+	socklen_t			set_optlen;
+
+	int				get_optname;
+	int				get_level;
+	const char			get_optval[64];
+	socklen_t			get_optlen;
+	socklen_t			get_optlen_ret;
+
+	enum sockopt_test_error		error;
+} tests[] = {
+
+	/* ==================== getsockopt ====================  */
+
+	{
+		.descr = "getsockopt: no expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = 0,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: wrong expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+		.error = DENY_ATTACH,
+	},
+	{
+		.descr = "getsockopt: bypass bpf hook",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: return EPERM from bpf hook",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.get_optname = IP_TOS,
+
+		.get_optlen = 1,
+		.error = EPERM_GETSOCKOPT,
+	},
+	{
+		.descr = "getsockopt: no optval bounds check, deny loading",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+
+			/* ctx->optval[0] = 0x80 */
+			BPF_MOV64_IMM(BPF_REG_0, 0x80),
+			BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: read ctx->level",
+		.insns = {
+			/* r6 = ctx->level */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, level)),
+
+			/* if (ctx->level == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = 123,
+
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->level",
+		.insns = {
+			/* ctx->level = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, level)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: read ctx->optname",
+		.insns = {
+			/* r6 = ctx->optname */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optname)),
+
+			/* if (ctx->optname == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optname = 123,
+
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: read ctx->retval",
+		.insns = {
+			/* r6 = ctx->retval */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.get_optname = IP_TOS,
+		.get_optlen = 1,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->optname",
+		.insns = {
+			/* ctx->optname = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optname)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: read ctx->optlen",
+		.insns = {
+			/* r6 = ctx->optlen */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* if (ctx->optlen == 64) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+	},
+	{
+		.descr = "getsockopt: deny bigger ctx->optlen",
+		.insns = {
+			/* ctx->optlen = 65 */
+			BPF_MOV64_IMM(BPF_REG_0, 65),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+
+		.error = EFAULT_GETSOCKOPT,
+	},
+	{
+		.descr = "getsockopt: deny arbitrary ctx->retval",
+		.insns = {
+			/* ctx->retval = 123 */
+			BPF_MOV64_IMM(BPF_REG_0, 123),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+
+		.error = EFAULT_GETSOCKOPT,
+	},
+	{
+		.descr = "getsockopt: support smaller ctx->optlen",
+		.insns = {
+			/* ctx->optlen = 32 */
+			BPF_MOV64_IMM(BPF_REG_0, 32),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_optlen = 64,
+		.get_optlen_ret = 32,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->optval",
+		.insns = {
+			/* ctx->optval = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: deny writing to ctx->optval_end",
+		.insns = {
+			/* ctx->optval_end = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval_end)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "getsockopt: rewrite value",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r2 = ctx->optval */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+			/* r6 = ctx->optval + 1 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+			/* r7 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+			/* ctx->optval[0] = 0xF0 */
+			BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0),
+			/* } */
+
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1*/
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_GETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.get_optname = IP_TOS,
+
+		.get_optval = { 0xF0 },
+		.get_optlen = 1,
+	},
+
+	/* ==================== setsockopt ====================  */
+
+	{
+		.descr = "setsockopt: no expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = 0,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: wrong expected_attach_type",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+		.error = DENY_ATTACH,
+	},
+	{
+		.descr = "setsockopt: bypass bpf hook",
+		.insns = {
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: return EPERM from bpf hook",
+		.insns = {
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_level = SOL_IP,
+		.set_optname = IP_TOS,
+
+		.set_optlen = 1,
+		.error = EPERM_SETSOCKOPT,
+	},
+	{
+		.descr = "setsockopt: no optval bounds check, deny loading",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+
+			/* r0 = ctx->optval[0] */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: read ctx->level",
+		.insns = {
+			/* r6 = ctx->level */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, level)),
+
+			/* if (ctx->level == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_level = 123,
+
+		.set_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: allow changing ctx->level",
+		.insns = {
+			/* ctx->level = SOL_IP */
+			BPF_MOV64_IMM(BPF_REG_0, SOL_IP),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, level)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = 234, /* should be rewritten to SOL_IP */
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: read ctx->optname",
+		.insns = {
+			/* r6 = ctx->optname */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optname)),
+
+			/* if (ctx->optname == 123) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4),
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optname = 123,
+
+		.set_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: allow changing ctx->optname",
+		.insns = {
+			/* ctx->optname = IP_TOS */
+			BPF_MOV64_IMM(BPF_REG_0, IP_TOS),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optname)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = 456, /* should be rewritten to IP_TOS */
+
+		.set_optval = { 1 << 3 },
+		.set_optlen = 1,
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: read ctx->optlen",
+		.insns = {
+			/* r6 = ctx->optlen */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* if (ctx->optlen == 64) { */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4),
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } else { */
+			/* return 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 64,
+	},
+	{
+		.descr = "setsockopt: ctx->optlen == -1 is ok",
+		.insns = {
+			/* ctx->optlen = -1 */
+			BPF_MOV64_IMM(BPF_REG_0, -1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 64,
+	},
+	{
+		.descr = "setsockopt: deny ctx->optlen < 0 (except -1)",
+		.insns = {
+			/* ctx->optlen = -2 */
+			BPF_MOV64_IMM(BPF_REG_0, -2),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 4,
+
+		.error = EFAULT_SETSOCKOPT,
+	},
+	{
+		.descr = "setsockopt: deny ctx->optlen > input optlen",
+		.insns = {
+			/* ctx->optlen = 65 */
+			BPF_MOV64_IMM(BPF_REG_0, 65),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.set_optlen = 64,
+
+		.error = EFAULT_SETSOCKOPT,
+	},
+	{
+		.descr = "setsockopt: allow changing ctx->optlen within bounds",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r2 = ctx->optval */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+			/* r6 = ctx->optval + 1 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+			/* r7 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+			/* ctx->optval[0] = 1 << 3 */
+			BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 1 << 3),
+			/* } */
+
+			/* ctx->optlen = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optlen)),
+
+			/* return 1*/
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 1, 1, 1, 1 },
+		.set_optlen = 4,
+		.get_optval = { 1 << 3 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: deny write ctx->retval",
+		.insns = {
+			/* ctx->retval = 0 */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: deny read ctx->retval",
+		.insns = {
+			/* r6 = ctx->retval */
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, retval)),
+
+			/* return 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: deny writing to ctx->optval",
+		.insns = {
+			/* ctx->optval = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: deny writing to ctx->optval_end",
+		.insns = {
+			/* ctx->optval_end = 1 */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct bpf_sockopt, optval_end)),
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.error = DENY_LOAD,
+	},
+	{
+		.descr = "setsockopt: allow IP_TOS <= 128",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r7 = ctx->optval + 1 */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1),
+
+			/* r8 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4),
+
+			/* r9 = ctx->optval[0] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0),
+
+			/* if (ctx->optval[0] < 128) */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } */
+
+			/* } else { */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 0x80 },
+		.set_optlen = 1,
+		.get_optval = { 0x80 },
+		.get_optlen = 1,
+	},
+	{
+		.descr = "setsockopt: deny IP_TOS > 128",
+		.insns = {
+			/* r6 = ctx->optval */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval)),
+			/* r7 = ctx->optval + 1 */
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1),
+
+			/* r8 = ctx->optval_end */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1,
+				    offsetof(struct bpf_sockopt, optval_end)),
+
+			/* if (ctx->optval + 1 <= ctx->optval_end) { */
+			BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4),
+
+			/* r9 = ctx->optval[0] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0),
+
+			/* if (ctx->optval[0] < 128) */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+			/* } */
+
+			/* } else { */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* } */
+
+			BPF_EXIT_INSN(),
+		},
+		.attach_type = BPF_CGROUP_SETSOCKOPT,
+		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+		.get_level = SOL_IP,
+		.set_level = SOL_IP,
+
+		.get_optname = IP_TOS,
+		.set_optname = IP_TOS,
+
+		.set_optval = { 0x81 },
+		.set_optlen = 1,
+		.get_optval = { 0x00 },
+		.get_optlen = 1,
+
+		.error = EPERM_SETSOCKOPT,
+	},
+};
+
+static int load_prog(const struct bpf_insn *insns,
+		     enum bpf_attach_type expected_attach_type)
+{
+	struct bpf_load_program_attr attr = {
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+		.expected_attach_type = expected_attach_type,
+		.insns = insns,
+		.license = "GPL",
+		.log_level = 2,
+	};
+	int fd;
+
+	for (;
+	     insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT);
+	     attr.insns_cnt++) {
+	}
+	attr.insns_cnt++;
+
+	fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf));
+	if (verbose && fd < 0)
+		fprintf(stderr, "%s\n", bpf_log_buf);
+
+	return fd;
+}
+
+static int run_test(int cgroup_fd, struct sockopt_test *test)
+{
+	int sock_fd, err, prog_fd;
+	void *optval = NULL;
+	int ret = 0;
+
+	prog_fd = load_prog(test->insns, test->expected_attach_type);
+	if (prog_fd < 0) {
+		if (test->error == DENY_LOAD)
+			return 0;
+
+		log_err("Failed to load BPF program");
+		return -1;
+	}
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+	if (err < 0) {
+		if (test->error == DENY_ATTACH)
+			goto close_prog_fd;
+
+		log_err("Failed to attach BPF program");
+		ret = -1;
+		goto close_prog_fd;
+	}
+
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		log_err("Failed to create AF_INET socket");
+		ret = -1;
+		goto detach_prog;
+	}
+
+	if (test->set_optlen) {
+		err = setsockopt(sock_fd, test->set_level, test->set_optname,
+				 test->set_optval, test->set_optlen);
+		if (err) {
+			if (errno == EPERM && test->error == EPERM_SETSOCKOPT)
+				goto close_sock_fd;
+			if (errno == EFAULT && test->error == EFAULT_SETSOCKOPT)
+				goto free_optval;
+
+			log_err("Failed to call setsockopt");
+			ret = -1;
+			goto close_sock_fd;
+		}
+	}
+
+	if (test->get_optlen) {
+		optval = malloc(test->get_optlen);
+		socklen_t optlen = test->get_optlen;
+		socklen_t expected_get_optlen = test->get_optlen_ret ?:
+			test->get_optlen;
+
+		err = getsockopt(sock_fd, test->get_level, test->get_optname,
+				 optval, &optlen);
+		if (err) {
+			if (errno == EPERM && test->error == EPERM_GETSOCKOPT)
+				goto free_optval;
+			if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT)
+				goto free_optval;
+
+			log_err("Failed to call getsockopt");
+			ret = -1;
+			goto free_optval;
+		}
+
+		if (optlen != expected_get_optlen) {
+			errno = 0;
+			log_err("getsockopt returned unexpected optlen");
+			ret = -1;
+			goto free_optval;
+		}
+
+		if (memcmp(optval, test->get_optval, optlen) != 0) {
+			errno = 0;
+			log_err("getsockopt returned unexpected optval");
+			ret = -1;
+			goto free_optval;
+		}
+	}
+
+	ret = test->error != OK;
+
+free_optval:
+	free(optval);
+close_sock_fd:
+	close(sock_fd);
+detach_prog:
+	bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+close_prog_fd:
+	close(prog_fd);
+	return ret;
+}
+
+int main(int args, char **argv)
+{
+	int err = EXIT_FAILURE, error_cnt = 0;
+	int cgroup_fd, i;
+
+	if (setup_cgroup_environment())
+		goto cleanup_obj;
+
+	cgroup_fd = create_and_get_cgroup(CG_PATH);
+	if (cgroup_fd < 0)
+		goto cleanup_cgroup_env;
+
+	if (join_cgroup(CG_PATH))
+		goto cleanup_cgroup;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		int err = run_test(cgroup_fd, &tests[i]);
+
+		if (err)
+			error_cnt++;
+
+		printf("#%d %s: %s\n", i, err ? "FAIL" : "PASS",
+		       tests[i].descr);
+	}
+
+	printf("Summary: %ld PASSED, %d FAILED\n",
+	       ARRAY_SIZE(tests) - error_cnt, error_cnt);
+	err = error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+
+cleanup_cgroup:
+	close(cgroup_fd);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+cleanup_obj:
+	return err;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 8a027dc0d8f5d421eba953a7116aea991459b202 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:52 -0700
Subject: selftests/bpf: add sockopt test that exercises sk helpers

socktop test that introduces new SOL_CUSTOM sockopt level and
stores whatever users sets in sk storage. Whenever getsockopt
is called, the original value is retrieved.

v9:
* SO_SNDBUF example to override user-supplied buffer

v7:
* use retval=0 and optlen-1

v6:
* test 'ret=1' use-case as well (Alexei Starovoitov)

v4:
* don't call bpf_sk_fullsock helper

v3:
* drop (__u8 *)(long) casts for optval{,_end}

v2:
* new test

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore         |   1 +
 tools/testing/selftests/bpf/Makefile           |   3 +-
 tools/testing/selftests/bpf/progs/sockopt_sk.c | 111 +++++++++++++
 tools/testing/selftests/bpf/test_sockopt_sk.c  | 214 +++++++++++++++++++++++++
 4 files changed, 328 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/sockopt_sk.c
 create mode 100644 tools/testing/selftests/bpf/test_sockopt_sk.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3fe92601223d..8ac076c311d4 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -40,3 +40,4 @@ test_hashmap
 test_btf_dump
 xdping
 test_sockopt
+test_sockopt_sk
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7f2210402e57..57ae4e168ace 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-	test_btf_dump test_cgroup_attach xdping test_sockopt
+	test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -104,6 +104,7 @@ $(OUTPUT)/test_sock_fields: cgroup_helpers.c
 $(OUTPUT)/test_sysctl: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
 $(OUTPUT)/test_sockopt: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
new file mode 100644
index 000000000000..076122c898e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+#define SOL_CUSTOM			0xdeadbeef
+
+struct sockopt_sk {
+	__u8 val;
+};
+
+struct bpf_map_def SEC("maps") socket_storage_map = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct sockopt_sk),
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk);
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+	struct sockopt_sk *storage;
+
+	if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+		/* Not interested in SOL_IP:IP_TOS;
+		 * let next BPF program in the cgroup chain or kernel
+		 * handle it.
+		 */
+		return 1;
+
+	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
+		/* Not interested in SOL_SOCKET:SO_SNDBUF;
+		 * let next BPF program in the cgroup chain or kernel
+		 * handle it.
+		 */
+		return 1;
+	}
+
+	if (ctx->level != SOL_CUSTOM)
+		return 0; /* EPERM, deny everything except custom level */
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0; /* EPERM, couldn't get sk storage */
+
+	if (!ctx->retval)
+		return 0; /* EPERM, kernel should not have handled
+			   * SOL_CUSTOM, something is wrong!
+			   */
+	ctx->retval = 0; /* Reset system call return value to zero */
+
+	optval[0] = storage->val;
+	ctx->optlen = 1;
+
+	return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+	struct sockopt_sk *storage;
+
+	if (ctx->level == SOL_IP && ctx->optname == IP_TOS)
+		/* Not interested in SOL_IP:IP_TOS;
+		 * let next BPF program in the cgroup chain or kernel
+		 * handle it.
+		 */
+		return 1;
+
+	if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
+		/* Overwrite SO_SNDBUF value */
+
+		if (optval + sizeof(__u32) > optval_end)
+			return 0; /* EPERM, bounds check */
+
+		*(__u32 *)optval = 0x55AA;
+		ctx->optlen = 4;
+
+		return 1;
+	}
+
+	if (ctx->level != SOL_CUSTOM)
+		return 0; /* EPERM, deny everything except custom level */
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0; /* EPERM, couldn't get sk storage */
+
+	storage->val = optval[0];
+	ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
+			   * setsockopt handler.
+			   */
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/test_sockopt_sk.c
new file mode 100644
index 000000000000..12e79ed075ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_sk.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH				"/sockopt"
+
+#define SOL_CUSTOM			0xdeadbeef
+
+static int getsetsockopt(void)
+{
+	int fd, err;
+	char buf[4] = {};
+	socklen_t optlen;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create socket");
+		return -1;
+	}
+
+	/* IP_TOS - BPF bypass */
+
+	buf[0] = 0x08;
+	err = setsockopt(fd, SOL_IP, IP_TOS, buf, 1);
+	if (err) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto err;
+	}
+
+	buf[0] = 0x00;
+	optlen = 1;
+	err = getsockopt(fd, SOL_IP, IP_TOS, buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto err;
+	}
+
+	if (buf[0] != 0x08) {
+		log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08",
+			buf[0]);
+		goto err;
+	}
+
+	/* IP_TTL - EPERM */
+
+	buf[0] = 1;
+	err = setsockopt(fd, SOL_IP, IP_TTL, buf, 1);
+	if (!err || errno != EPERM) {
+		log_err("Unexpected success from setsockopt(IP_TTL)");
+		goto err;
+	}
+
+	/* SOL_CUSTOM - handled by BPF */
+
+	buf[0] = 0x01;
+	err = setsockopt(fd, SOL_CUSTOM, 0, buf, 1);
+	if (err) {
+		log_err("Failed to call setsockopt");
+		goto err;
+	}
+
+	buf[0] = 0x00;
+	optlen = 4;
+	err = getsockopt(fd, SOL_CUSTOM, 0, buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt");
+		goto err;
+	}
+
+	if (optlen != 1) {
+		log_err("Unexpected optlen %d != 1", optlen);
+		goto err;
+	}
+	if (buf[0] != 0x01) {
+		log_err("Unexpected buf[0] 0x%02x != 0x01", buf[0]);
+		goto err;
+	}
+
+	/* SO_SNDBUF is overwritten */
+
+	buf[0] = 0x01;
+	buf[1] = 0x01;
+	buf[2] = 0x01;
+	buf[3] = 0x01;
+	err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, 4);
+	if (err) {
+		log_err("Failed to call setsockopt(SO_SNDBUF)");
+		goto err;
+	}
+
+	buf[0] = 0x00;
+	buf[1] = 0x00;
+	buf[2] = 0x00;
+	buf[3] = 0x00;
+	optlen = 4;
+	err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(SO_SNDBUF)");
+		goto err;
+	}
+
+	if (*(__u32 *)buf != 0x55AA*2) {
+		log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2",
+			*(__u32 *)buf);
+		goto err;
+	}
+
+	close(fd);
+	return 0;
+err:
+	close(fd);
+	return -1;
+}
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+	enum bpf_attach_type attach_type;
+	enum bpf_prog_type prog_type;
+	struct bpf_program *prog;
+	int err;
+
+	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+	if (err) {
+		log_err("Failed to deduct types for %s BPF program", title);
+		return -1;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, title);
+	if (!prog) {
+		log_err("Failed to find %s BPF program", title);
+		return -1;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+			      attach_type, 0);
+	if (err) {
+		log_err("Failed to attach %s BPF program", title);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int run_test(int cgroup_fd)
+{
+	struct bpf_prog_load_attr attr = {
+		.file = "./sockopt_sk.o",
+	};
+	struct bpf_object *obj;
+	int ignored;
+	int err;
+
+	err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
+	if (err)
+		goto close_bpf_object;
+
+	err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
+	if (err)
+		goto close_bpf_object;
+
+	err = getsetsockopt();
+
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+int main(int args, char **argv)
+{
+	int cgroup_fd;
+	int err = EXIT_SUCCESS;
+
+	if (setup_cgroup_environment())
+		goto cleanup_obj;
+
+	cgroup_fd = create_and_get_cgroup(CG_PATH);
+	if (cgroup_fd < 0)
+		goto cleanup_cgroup_env;
+
+	if (join_cgroup(CG_PATH))
+		goto cleanup_cgroup;
+
+	if (run_test(cgroup_fd))
+		err = EXIT_FAILURE;
+
+	printf("test_sockopt_sk: %s\n",
+	       err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+	close(cgroup_fd);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+cleanup_obj:
+	return err;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 65b4414a05ebf51966c4a28a2a1b156ab5a01b0f Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:53 -0700
Subject: selftests/bpf: add sockopt test that exercises BPF_F_ALLOW_MULTI

sockopt test that verifies chaining behavior.

v9:
* setsockopt chaining example

v7:
* rework the test to verify cgroup getsockopt chaining

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore            |   1 +
 tools/testing/selftests/bpf/Makefile              |   4 +-
 tools/testing/selftests/bpf/progs/sockopt_multi.c |  71 ++++
 tools/testing/selftests/bpf/test_sockopt_multi.c  | 374 ++++++++++++++++++++++
 4 files changed, 449 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/sockopt_multi.c
 create mode 100644 tools/testing/selftests/bpf/test_sockopt_multi.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 8ac076c311d4..a2f7f79c7908 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -41,3 +41,4 @@ test_btf_dump
 xdping
 test_sockopt
 test_sockopt_sk
+test_sockopt_multi
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 57ae4e168ace..de1754a8f5fe 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -26,7 +26,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
-	test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk
+	test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
+	test_sockopt_multi
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -105,6 +106,7 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c
 $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
 $(OUTPUT)/test_sockopt: cgroup_helpers.c
 $(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
+$(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
new file mode 100644
index 000000000000..4afd2595c08e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+SEC("cgroup/getsockopt/child")
+int _getsockopt_child(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+		return 1;
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	if (optval[0] != 0x80)
+		return 0; /* EPERM, unexpected optval from the kernel */
+
+	ctx->retval = 0; /* Reset system call return value to zero */
+
+	optval[0] = 0x90;
+	ctx->optlen = 1;
+
+	return 1;
+}
+
+SEC("cgroup/getsockopt/parent")
+int _getsockopt_parent(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+		return 1;
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	if (optval[0] != 0x90)
+		return 0; /* EPERM, unexpected optval from the kernel */
+
+	ctx->retval = 0; /* Reset system call return value to zero */
+
+	optval[0] = 0xA0;
+	ctx->optlen = 1;
+
+	return 1;
+}
+
+SEC("cgroup/setsockopt")
+int _setsockopt(struct bpf_sockopt *ctx)
+{
+	__u8 *optval_end = ctx->optval_end;
+	__u8 *optval = ctx->optval;
+
+	if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
+		return 1;
+
+	if (optval + 1 > optval_end)
+		return 0; /* EPERM, bounds check */
+
+	optval[0] += 0x10;
+	ctx->optlen = 1;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/test_sockopt_multi.c b/tools/testing/selftests/bpf/test_sockopt_multi.c
new file mode 100644
index 000000000000..4be3441db867
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockopt_multi.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+	enum bpf_attach_type attach_type;
+	enum bpf_prog_type prog_type;
+	struct bpf_program *prog;
+	int err;
+
+	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+	if (err) {
+		log_err("Failed to deduct types for %s BPF program", title);
+		return -1;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, title);
+	if (!prog) {
+		log_err("Failed to find %s BPF program", title);
+		return -1;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
+			      attach_type, BPF_F_ALLOW_MULTI);
+	if (err) {
+		log_err("Failed to attach %s BPF program", title);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title)
+{
+	enum bpf_attach_type attach_type;
+	enum bpf_prog_type prog_type;
+	struct bpf_program *prog;
+	int err;
+
+	err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
+	if (err)
+		return -1;
+
+	prog = bpf_object__find_program_by_title(obj, title);
+	if (!prog)
+		return -1;
+
+	err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd,
+			       attach_type);
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
+			       int cg_child, int sock_fd)
+{
+	socklen_t optlen;
+	__u8 buf;
+	int err;
+
+	/* Set IP_TOS to the expected value (0x80). */
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach child program and make sure it returns new value:
+	 * - kernel:      -> 0x80
+	 * - child:  0x80 -> 0x90
+	 */
+
+	err = prog_attach(obj, cg_child, "cgroup/getsockopt/child");
+	if (err)
+		goto detach;
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x90) {
+		log_err("Unexpected getsockopt 0x%x != 0x90", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach parent program and make sure it returns new value:
+	 * - kernel:      -> 0x80
+	 * - child:  0x80 -> 0x90
+	 * - parent: 0x90 -> 0xA0
+	 */
+
+	err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent");
+	if (err)
+		goto detach;
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0xA0) {
+		log_err("Unexpected getsockopt 0x%x != 0xA0", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Setting unexpected initial sockopt should return EPERM:
+	 * - kernel: -> 0x40
+	 * - child:  unexpected 0x40, EPERM
+	 * - parent: unexpected 0x40, EPERM
+	 */
+
+	buf = 0x40;
+	if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (!err) {
+		log_err("Unexpected success from getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	/* Detach child program and make sure we still get EPERM:
+	 * - kernel: -> 0x40
+	 * - parent: unexpected 0x40, EPERM
+	 */
+
+	err = prog_detach(obj, cg_child, "cgroup/getsockopt/child");
+	if (err) {
+		log_err("Failed to detach child program");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (!err) {
+		log_err("Unexpected success from getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	/* Set initial value to the one the parent program expects:
+	 * - kernel:      -> 0x90
+	 * - parent: 0x90 -> 0xA0
+	 */
+
+	buf = 0x90;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0xA0) {
+		log_err("Unexpected getsockopt 0x%x != 0xA0", buf);
+		err = -1;
+		goto detach;
+	}
+
+detach:
+	prog_detach(obj, cg_child, "cgroup/getsockopt/child");
+	prog_detach(obj, cg_parent, "cgroup/getsockopt/parent");
+
+	return err;
+}
+
+static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
+			       int cg_child, int sock_fd)
+{
+	socklen_t optlen;
+	__u8 buf;
+	int err;
+
+	/* Set IP_TOS to the expected value (0x80). */
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach child program and make sure it adds 0x10. */
+
+	err = prog_attach(obj, cg_child, "cgroup/setsockopt");
+	if (err)
+		goto detach;
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80 + 0x10) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 + 0x10", buf);
+		err = -1;
+		goto detach;
+	}
+
+	/* Attach parent program and make sure it adds another 0x10. */
+
+	err = prog_attach(obj, cg_parent, "cgroup/setsockopt");
+	if (err)
+		goto detach;
+
+	buf = 0x80;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
+		log_err("Failed to call setsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	buf = 0x00;
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (err) {
+		log_err("Failed to call getsockopt(IP_TOS)");
+		goto detach;
+	}
+
+	if (buf != 0x80 + 2 * 0x10) {
+		log_err("Unexpected getsockopt 0x%x != 0x80 + 2 * 0x10", buf);
+		err = -1;
+		goto detach;
+	}
+
+detach:
+	prog_detach(obj, cg_child, "cgroup/setsockopt");
+	prog_detach(obj, cg_parent, "cgroup/setsockopt");
+
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	struct bpf_prog_load_attr attr = {
+		.file = "./sockopt_multi.o",
+	};
+	int cg_parent = -1, cg_child = -1;
+	struct bpf_object *obj = NULL;
+	int sock_fd = -1;
+	int err = -1;
+	int ignored;
+
+	if (setup_cgroup_environment()) {
+		log_err("Failed to setup cgroup environment\n");
+		goto out;
+	}
+
+	cg_parent = create_and_get_cgroup("/parent");
+	if (cg_parent < 0) {
+		log_err("Failed to create cgroup /parent\n");
+		goto out;
+	}
+
+	cg_child = create_and_get_cgroup("/parent/child");
+	if (cg_child < 0) {
+		log_err("Failed to create cgroup /parent/child\n");
+		goto out;
+	}
+
+	if (join_cgroup("/parent/child")) {
+		log_err("Failed to join cgroup /parent/child\n");
+		goto out;
+	}
+
+	err = bpf_prog_load_xattr(&attr, &obj, &ignored);
+	if (err) {
+		log_err("Failed to load BPF object");
+		goto out;
+	}
+
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		log_err("Failed to create socket");
+		goto out;
+	}
+
+	if (run_getsockopt_test(obj, cg_parent, cg_child, sock_fd))
+		err = -1;
+	printf("test_sockopt_multi: getsockopt %s\n",
+	       err ? "FAILED" : "PASSED");
+
+	if (run_setsockopt_test(obj, cg_parent, cg_child, sock_fd))
+		err = -1;
+	printf("test_sockopt_multi: setsockopt %s\n",
+	       err ? "FAILED" : "PASSED");
+
+out:
+	close(sock_fd);
+	bpf_object__close(obj);
+	close(cg_child);
+	close(cg_parent);
+
+	printf("test_sockopt_multi: %s\n", err ? "FAILED" : "PASSED");
+	return err ? EXIT_FAILURE : EXIT_SUCCESS;
+}
-- 
cgit v1.2.3-59-g8ed1b


From f6d08d9d8543c8ee494b307804b28e2750ffedb9 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 13:38:55 -0700
Subject: bpftool: support cgroup sockopt

Support sockopt prog type and cgroup hooks in the bpftool.

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Martin Lau <kafai@fb.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 7 +++++--
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   | 3 ++-
 tools/bpf/bpftool/bash-completion/bpftool          | 9 ++++++---
 tools/bpf/bpftool/cgroup.c                         | 5 ++++-
 tools/bpf/bpftool/main.h                           | 1 +
 tools/bpf/bpftool/prog.c                           | 3 ++-
 6 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index d80fdde79c22..585f270c2d25 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -29,7 +29,8 @@ CGROUP COMMANDS
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 |	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
 |		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-|		**sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** }
+|		**sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** |
+|		**getsockopt** | **setsockopt** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -90,7 +91,9 @@ DESCRIPTION
                   an unconnected udp4 socket (since 5.2);
 		  **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for
                   an unconnected udp6 socket (since 5.2);
-		  **sysctl** sysctl access (since 5.2).
+		  **sysctl** sysctl access (since 5.2);
+		  **getsockopt** call to getsockopt (since 5.3);
+		  **setsockopt** call to setsockopt (since 5.3).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 55dd06517a3b..1df637f85f94 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -40,7 +40,8 @@ PROG COMMANDS
 |		**lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
-|		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl**
+|		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
+|		**cgroup/getsockopt** | **cgroup/setsockopt**
 |	}
 |       *ATTACH_TYPE* := {
 |		**msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a17e84c67498..ba37095e1f62 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -379,7 +379,8 @@ _bpftool()
                                 cgroup/sendmsg4 cgroup/sendmsg6 \
                                 cgroup/recvmsg4 cgroup/recvmsg6 \
                                 cgroup/post_bind4 cgroup/post_bind6 \
-                                cgroup/sysctl" -- \
+                                cgroup/sysctl cgroup/getsockopt \
+                                cgroup/setsockopt" -- \
                                                    "$cur" ) )
                             return 0
                             ;;
@@ -689,7 +690,8 @@ _bpftool()
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
                         device bind4 bind6 post_bind4 post_bind6 connect4 \
-                        connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl'
+                        connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
+                        getsockopt setsockopt'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag'
                     case $prev in
@@ -699,7 +701,8 @@ _bpftool()
                             ;;
                         ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
                         post_bind4|post_bind6|connect4|connect6|sendmsg4|\
-                        sendmsg6|recvmsg4|recvmsg6|sysctl)
+                        sendmsg6|recvmsg4|recvmsg6|sysctl|getsockopt|\
+                        setsockopt)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 73ec8ea33fb4..390b89a224f1 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -26,7 +26,8 @@
 	"                        sock_ops | device | bind4 | bind6 |\n"	       \
 	"                        post_bind4 | post_bind6 | connect4 |\n"       \
 	"                        connect6 | sendmsg4 | sendmsg6 |\n"           \
-	"                        recvmsg4 | recvmsg6 | sysctl }"
+	"                        recvmsg4 | recvmsg6 | sysctl |\n"	       \
+	"                        getsockopt | setsockopt }"
 
 static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
@@ -45,6 +46,8 @@ static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_SYSCTL] = "sysctl",
 	[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
 	[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+	[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+	[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 28a2a5857e14..9c5d9c80f71e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -74,6 +74,7 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
 	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
 	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
+	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
 };
 
 extern const char * const map_type_name[];
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f1a831f05010..9b0db5d14e31 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1071,7 +1071,8 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
 		"                 cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
-		"                 cgroup/recvmsg6 }\n"
+		"                 cgroup/recvmsg6 | cgroup/getsockopt |\n"
+		"                 cgroup/setsockopt }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
-- 
cgit v1.2.3-59-g8ed1b


From 2d6dbb9a65f4001f2878512078394c11301994f3 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 27 Jun 2019 18:12:33 -0700
Subject: selftests/bpf: fix -Wstrict-aliasing in test_sockopt_sk.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let's use union with u8[4] and u32 members for sockopt buffer,
that should fix any possible aliasing issues.

test_sockopt_sk.c: In function ‘getsetsockopt’:
test_sockopt_sk.c:115:2: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing]
  if (*(__u32 *)buf != 0x55AA*2) {
  ^~
test_sockopt_sk.c:116:3: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing]
   log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2",
   ^~~~~~~

Fixes: 8a027dc0d8f5 ("selftests/bpf: add sockopt test that exercises sk helpers")
Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_sockopt_sk.c | 51 +++++++++++++--------------
 1 file changed, 24 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/test_sockopt_sk.c
index 12e79ed075ce..036b652e5ca9 100644
--- a/tools/testing/selftests/bpf/test_sockopt_sk.c
+++ b/tools/testing/selftests/bpf/test_sockopt_sk.c
@@ -22,7 +22,10 @@
 static int getsetsockopt(void)
 {
 	int fd, err;
-	char buf[4] = {};
+	union {
+		char u8[4];
+		__u32 u32;
+	} buf = {};
 	socklen_t optlen;
 
 	fd = socket(AF_INET, SOCK_STREAM, 0);
@@ -33,31 +36,31 @@ static int getsetsockopt(void)
 
 	/* IP_TOS - BPF bypass */
 
-	buf[0] = 0x08;
-	err = setsockopt(fd, SOL_IP, IP_TOS, buf, 1);
+	buf.u8[0] = 0x08;
+	err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1);
 	if (err) {
 		log_err("Failed to call setsockopt(IP_TOS)");
 		goto err;
 	}
 
-	buf[0] = 0x00;
+	buf.u8[0] = 0x00;
 	optlen = 1;
-	err = getsockopt(fd, SOL_IP, IP_TOS, buf, &optlen);
+	err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen);
 	if (err) {
 		log_err("Failed to call getsockopt(IP_TOS)");
 		goto err;
 	}
 
-	if (buf[0] != 0x08) {
+	if (buf.u8[0] != 0x08) {
 		log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08",
-			buf[0]);
+			buf.u8[0]);
 		goto err;
 	}
 
 	/* IP_TTL - EPERM */
 
-	buf[0] = 1;
-	err = setsockopt(fd, SOL_IP, IP_TTL, buf, 1);
+	buf.u8[0] = 1;
+	err = setsockopt(fd, SOL_IP, IP_TTL, &buf, 1);
 	if (!err || errno != EPERM) {
 		log_err("Unexpected success from setsockopt(IP_TTL)");
 		goto err;
@@ -65,16 +68,16 @@ static int getsetsockopt(void)
 
 	/* SOL_CUSTOM - handled by BPF */
 
-	buf[0] = 0x01;
-	err = setsockopt(fd, SOL_CUSTOM, 0, buf, 1);
+	buf.u8[0] = 0x01;
+	err = setsockopt(fd, SOL_CUSTOM, 0, &buf, 1);
 	if (err) {
 		log_err("Failed to call setsockopt");
 		goto err;
 	}
 
-	buf[0] = 0x00;
+	buf.u32 = 0x00;
 	optlen = 4;
-	err = getsockopt(fd, SOL_CUSTOM, 0, buf, &optlen);
+	err = getsockopt(fd, SOL_CUSTOM, 0, &buf, &optlen);
 	if (err) {
 		log_err("Failed to call getsockopt");
 		goto err;
@@ -84,37 +87,31 @@ static int getsetsockopt(void)
 		log_err("Unexpected optlen %d != 1", optlen);
 		goto err;
 	}
-	if (buf[0] != 0x01) {
-		log_err("Unexpected buf[0] 0x%02x != 0x01", buf[0]);
+	if (buf.u8[0] != 0x01) {
+		log_err("Unexpected buf[0] 0x%02x != 0x01", buf.u8[0]);
 		goto err;
 	}
 
 	/* SO_SNDBUF is overwritten */
 
-	buf[0] = 0x01;
-	buf[1] = 0x01;
-	buf[2] = 0x01;
-	buf[3] = 0x01;
-	err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, 4);
+	buf.u32 = 0x01010101;
+	err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, 4);
 	if (err) {
 		log_err("Failed to call setsockopt(SO_SNDBUF)");
 		goto err;
 	}
 
-	buf[0] = 0x00;
-	buf[1] = 0x00;
-	buf[2] = 0x00;
-	buf[3] = 0x00;
+	buf.u32 = 0x00;
 	optlen = 4;
-	err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, &optlen);
+	err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, &optlen);
 	if (err) {
 		log_err("Failed to call getsockopt(SO_SNDBUF)");
 		goto err;
 	}
 
-	if (*(__u32 *)buf != 0x55AA*2) {
+	if (buf.u32 != 0x55AA*2) {
 		log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2",
-			*(__u32 *)buf);
+			buf.u32);
 		goto err;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 0472301a28f6cf53a6bc5783e48a2d0bbff4682f Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Fri, 28 Jun 2019 07:08:45 +0300
Subject: bpf: fix uapi bpf_prog_info fields alignment

Merge commit 1c8c5a9d38f60 ("Merge
git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next") undid the
fix from commit 36f9814a494 ("bpf: fix uapi hole for 32 bit compat
applications") by taking the gpl_compatible 1-bit field definition from
commit b85fab0e67b162 ("bpf: Add gpl_compatible flag to struct
bpf_prog_info") as is. That breaks architectures with 16-bit alignment
like m68k. Add 31-bit pad after gpl_compatible to restore alignment of
following fields.

Thanks to Dmitry V. Levin his analysis of this bug history.

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h       | 1 +
 tools/include/uapi/linux/bpf.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a8b823c30b43..29a5bc3d5c66 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3143,6 +3143,7 @@ struct bpf_prog_info {
 	char name[BPF_OBJ_NAME_LEN];
 	__u32 ifindex;
 	__u32 gpl_compatible:1;
+	__u32 :31; /* alignment pad */
 	__u64 netns_dev;
 	__u64 netns_ino;
 	__u32 nr_jited_ksyms;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a8b823c30b43..29a5bc3d5c66 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3143,6 +3143,7 @@ struct bpf_prog_info {
 	char name[BPF_OBJ_NAME_LEN];
 	__u32 ifindex;
 	__u32 gpl_compatible:1;
+	__u32 :31; /* alignment pad */
 	__u64 netns_dev;
 	__u64 netns_ino;
 	__u32 nr_jited_ksyms;
-- 
cgit v1.2.3-59-g8ed1b


From 3099c59db0f2d6b85389af5ac652569bac3f1ee8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 27 Jun 2019 17:12:42 +0200
Subject: selftests: rtnetlink: skip ipsec offload tests if netdevsim isn't
 present

running the script on systems without netdevsim now prints:

SKIP: ipsec_offload can't load netdevsim

instead of error message & failed status.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/rtnetlink.sh | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 505628884783..bdbf4b3125b6 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -738,13 +738,17 @@ kci_test_ipsec_offload()
 	sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
 	sysfsf=$sysfsd/ipsec
 	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+	probed=false
 
 	# setup netdevsim since dummydev doesn't have offload support
-	modprobe netdevsim
-	check_err $?
-	if [ $ret -ne 0 ]; then
-		echo "FAIL: ipsec_offload can't load netdevsim"
-		return 1
+	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+		modprobe -q netdevsim
+		check_err $?
+		if [ $ret -ne 0 ]; then
+			echo "SKIP: ipsec_offload can't load netdevsim"
+			return $ksft_skip
+		fi
+		probed=true
 	fi
 
 	echo "0" > /sys/bus/netdevsim/new_device
@@ -824,7 +828,7 @@ EOF
 	fi
 
 	# clean up any leftovers
-	rmmod netdevsim
+	$probed && rmmod netdevsim
 
 	if [ $ret -ne 0 ]; then
 		echo "FAIL: ipsec_offload"
-- 
cgit v1.2.3-59-g8ed1b


From 95b9395ba103ec0cc40bebb71a08231b5e226a76 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 28 Jun 2019 11:03:43 -0700
Subject: selftests: add a test case for cls_lower handle overflow

Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/filters/tests.json  | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index e2f92cefb8d5..16559c436f21 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -38,6 +38,25 @@
             "$TC qdisc del dev $DEV1 clsact"
         ]
     },
+    {
+        "id": "2ff3",
+        "name": "Add flower with max handle and then dump it",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 parent ffff: handle 0xffffffff flower action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
+    },
     {
         "id": "d052",
         "name": "Add 1M filters with the same action",
-- 
cgit v1.2.3-59-g8ed1b


From 5d15a8ec2aebbb86b9db98ee98d6f9765168d7a6 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Fri, 28 Jun 2019 14:30:18 -0400
Subject: tc-testing: updated mirred action tests with batch create/delete

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/mirred.json        | 94 ++++++++++++++++++++++
 1 file changed, 94 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 6e5fb3d25681..2232b21e2510 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -459,5 +459,99 @@
         "teardown": [
             "$TC actions flush action mirred"
         ]
+    },
+    {
+        "id": "4749",
+        "name": "Add batch of 32 mirred redirect egress actions with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred egress redirect dev lo index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "5c69",
+        "name": "Delete batch of 32 mirred redirect egress actions",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred egress redirect dev lo index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "d3c0",
+        "name": "Add batch of 32 mirred mirror ingress actions with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred ingress mirror dev lo index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "e684",
+        "name": "Delete batch of 32 mirred mirror ingress actions",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred ingress mirror dev lo index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action mirred index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
     }
 ]
-- 
cgit v1.2.3-59-g8ed1b


From a8488b702648ee6a5f391e2127e98e69ccca5b45 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Fri, 28 Jun 2019 17:32:01 -0400
Subject: tc-testing: added tdc tests for prio qdisc

Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/qdiscs/prio.json | 276 +++++++++++++++++++++
 1 file changed, 276 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
new file mode 100644
index 000000000000..9c792fa8ca23
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
@@ -0,0 +1,276 @@
+[
+    {
+        "id": "ddd9",
+        "name": "Add prio qdisc on egress",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "aa71",
+        "name": "Add prio qdisc on egress with handle of maximum value",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle ffff: prio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio ffff: root",
+        "matchCount": "1",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "db37",
+        "name": "Add prio qdisc on egress with invalid handle exceeding maximum value",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 10000: prio",
+        "expExitCode": "255",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 10000: root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "39d8",
+        "name": "Add prio qdisc on egress with unsupported argument",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio foorbar",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "5769",
+        "name": "Add prio qdisc on egress with 4 bands and new priomap",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "fe0f",
+        "name": "Add prio qdisc on egress with 4 bands and priomap exceeding TC_PRIO_MAX entries",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "1f91",
+        "name": "Add prio qdisc on egress with 4 bands and priomap's values exceeding bands number",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "d248",
+        "name": "Add prio qdisc on egress with invalid bands value (< 2)",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 1 priomap.*0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "1d0e",
+        "name": "Add prio qdisc on egress with invalid bands value exceeding TCQ_PRIO_BANDS",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 1024 priomap.*1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "1971",
+        "name": "Replace default prio qdisc on egress with 8 bands and new priomap",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 handle 1: root prio"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DEV1 handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root.*bands 8 priomap.*1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "d88a",
+        "name": "Add duplicate prio qdisc on egress",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 handle 1: root prio"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 handle 1: root prio",
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "5948",
+        "name": "Delete nonexistent prio qdisc",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 root handle 1: prio",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "6c0a",
+        "name": "Add prio qdisc on egress with invalid format for handles",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 123^ prio",
+        "expExitCode": "255",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc prio 123 root",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    },
+    {
+        "id": "0175",
+        "name": "Delete prio qdisc twice",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DEV1 type dummy || /bin/true",
+            "$TC qdisc add dev $DEV1 root handle 1: prio",
+            "$TC qdisc del dev $DEV1 root handle 1: prio"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DEV1 handle 1: root prio",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DEV1",
+        "matchPattern": "qdisc ingress ffff:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DEV1 type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3-59-g8ed1b


From 509e56b37cc32c9b5fc2be585c25d1e60d6a1d73 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Mon, 1 Jul 2019 14:39:01 -0700
Subject: blackhole_dev: add a selftest

Since this is not really a device with all capabilities, this test
ensures that it has *enough* to make it through the data path
without causing unwanted side-effects (read crash!).

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/Kconfig.debug                                 |   9 ++
 lib/Makefile                                      |   1 +
 lib/test_blackhole_dev.c                          | 100 ++++++++++++++++++++++
 tools/testing/selftests/net/Makefile              |   2 +-
 tools/testing/selftests/net/config                |   1 +
 tools/testing/selftests/net/test_blackhole_dev.sh |  11 +++
 6 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 lib/test_blackhole_dev.c
 create mode 100755 tools/testing/selftests/net/test_blackhole_dev.sh

(limited to 'tools')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cbdfae379896..99272b5dd980 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1909,6 +1909,15 @@ config TEST_BPF
 
 	  If unsure, say N.
 
+config TEST_BLACKHOLE_DEV
+	tristate "Test blackhole netdev functionality"
+	depends on m && NET
+	help
+	  This builds the "test_blackhole_dev" module that validates the
+	  data path through this blackhole netdev.
+
+	  If unsure, say N.
+
 config FIND_BIT_BENCHMARK
 	tristate "Test find_bit functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index dcb558c7554d..6ac44fe2a37f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -91,6 +91,7 @@ obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
 obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
 obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o
 obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o
+obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
 
 obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
 
diff --git a/lib/test_blackhole_dev.c b/lib/test_blackhole_dev.c
new file mode 100644
index 000000000000..4c40580a99a3
--- /dev/null
+++ b/lib/test_blackhole_dev.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This module tests the blackhole_dev that is created during the
+ * net subsystem initialization. The test this module performs is
+ * by injecting an skb into the stack with skb->dev as the
+ * blackhole_dev and expects kernel to behave in a sane manner
+ * (in other words, *not crash*)!
+ *
+ * Copyright (c) 2018, Mahesh Bandewar <maheshb@google.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+#include <linux/ipv6.h>
+
+#include <net/dst.h>
+
+#define SKB_SIZE  256
+#define HEAD_SIZE (14+40+8)	/* Ether + IPv6 + UDP */
+#define TAIL_SIZE 32		/* random tail-room */
+
+#define UDP_PORT 1234
+
+static int __init test_blackholedev_init(void)
+{
+	struct ipv6hdr *ip6h;
+	struct sk_buff *skb;
+	struct ethhdr *ethh;
+	struct udphdr *uh;
+	int data_len;
+	int ret;
+
+	skb = alloc_skb(SKB_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	/* Reserve head-room for the headers */
+	skb_reserve(skb, HEAD_SIZE);
+
+	/* Add data to the skb */
+	data_len = SKB_SIZE - (HEAD_SIZE + TAIL_SIZE);
+	memset(__skb_put(skb, data_len), 0xf, data_len);
+
+	/* Add protocol data */
+	/* (Transport) UDP */
+	uh = (struct udphdr *)skb_push(skb, sizeof(struct udphdr));
+	skb_set_transport_header(skb, 0);
+	uh->source = uh->dest = htons(UDP_PORT);
+	uh->len = htons(data_len);
+	uh->check = 0;
+	/* (Network) IPv6 */
+	ip6h = (struct ipv6hdr *)skb_push(skb, sizeof(struct ipv6hdr));
+	skb_set_network_header(skb, 0);
+	ip6h->hop_limit = 32;
+	ip6h->payload_len = data_len + sizeof(struct udphdr);
+	ip6h->nexthdr = IPPROTO_UDP;
+	ip6h->saddr = in6addr_loopback;
+	ip6h->daddr = in6addr_loopback;
+	/* Ether */
+	ethh = (struct ethhdr *)skb_push(skb, sizeof(struct ethhdr));
+	skb_set_mac_header(skb, 0);
+
+	skb->protocol = htons(ETH_P_IPV6);
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = blackhole_netdev;
+
+	/* Now attempt to send the packet */
+	ret = dev_queue_xmit(skb);
+
+	switch (ret) {
+	case NET_XMIT_SUCCESS:
+		pr_warn("dev_queue_xmit() returned NET_XMIT_SUCCESS\n");
+		break;
+	case NET_XMIT_DROP:
+		pr_warn("dev_queue_xmit() returned NET_XMIT_DROP\n");
+		break;
+	case NET_XMIT_CN:
+		pr_warn("dev_queue_xmit() returned NET_XMIT_CN\n");
+		break;
+	default:
+		pr_err("dev_queue_xmit() returned UNKNOWN(%d)\n", ret);
+	}
+
+	return 0;
+}
+
+static void __exit test_blackholedev_exit(void)
+{
+	pr_warn("test_blackholedev module terminating.\n");
+}
+
+module_init(test_blackholedev_init);
+module_exit(test_blackholedev_exit);
+
+MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9a275d932fd5..1b24e36b4047 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
-	      rtnetlink.sh xfrm_policy.sh
+	      rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
 TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
 TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 89f84b5118bf..e4b878d95ba0 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -27,3 +27,4 @@ CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_ETF=m
+CONFIG_TEST_BLACKHOLE_DEV=m
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
new file mode 100755
index 000000000000..3119b80e711f
--- /dev/null
+++ b/tools/testing/selftests/net/test_blackhole_dev.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs blackhole-dev test using blackhole-dev kernel module
+
+if /sbin/modprobe -q test_blackhole_dev ; then
+	/sbin/modprobe -q -r test_blackhole_dev;
+	echo "test_blackhole_dev: ok";
+else
+	echo "test_blackhole_dev: [FAIL]";
+	exit 1;
+fi
-- 
cgit v1.2.3-59-g8ed1b


From ff95bf28c23490584b9d75913a520bb7bb1f2ecb Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Mon, 1 Jul 2019 12:40:31 +0800
Subject: selftests/net: skip psock_tpacket test if KALLSYMS was not enabled

The psock_tpacket test will need to access /proc/kallsyms, this would
require the kernel config CONFIG_KALLSYMS to be enabled first.

Apart from adding CONFIG_KALLSYMS to the net/config file here, check the
file existence to determine if we can run this test will be helpful to
avoid a false-positive test result when testing it directly with the
following commad against a kernel that have CONFIG_KALLSYMS disabled:
    make -C tools/testing/selftests TARGETS=net run_tests

Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/config            |  1 +
 tools/testing/selftests/net/run_afpackettests | 14 +++++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 474040448601..3dea2cba2325 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -25,3 +25,4 @@ CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_KALLSYMS=y
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index ea5938ec009a..8b42e8b04e0f 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -21,12 +21,16 @@ fi
 echo "--------------------"
 echo "running psock_tpacket test"
 echo "--------------------"
-./in_netns.sh ./psock_tpacket
-if [ $? -ne 0 ]; then
-	echo "[FAIL]"
-	ret=1
+if [ -f /proc/kallsyms ]; then
+	./in_netns.sh ./psock_tpacket
+	if [ $? -ne 0 ]; then
+		echo "[FAIL]"
+		ret=1
+	else
+		echo "[PASS]"
+	fi
 else
-	echo "[PASS]"
+	echo "[SKIP] CONFIG_KALLSYMS not enabled"
 fi
 
 echo "--------------------"
-- 
cgit v1.2.3-59-g8ed1b


From fd709721352dd5239056eacaded00f2244e6ef58 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 2 Jul 2019 12:46:00 +0200
Subject: xfrm: policy: fix bydst hlist corruption on hash rebuild

syzbot reported following spat:

BUG: KASAN: use-after-free in __write_once_size include/linux/compiler.h:221
BUG: KASAN: use-after-free in hlist_del_rcu include/linux/rculist.h:455
BUG: KASAN: use-after-free in xfrm_hash_rebuild+0xa0d/0x1000 net/xfrm/xfrm_policy.c:1318
Write of size 8 at addr ffff888095e79c00 by task kworker/1:3/8066
Workqueue: events xfrm_hash_rebuild
Call Trace:
 __write_once_size include/linux/compiler.h:221 [inline]
 hlist_del_rcu include/linux/rculist.h:455 [inline]
 xfrm_hash_rebuild+0xa0d/0x1000 net/xfrm/xfrm_policy.c:1318
 process_one_work+0x814/0x1130 kernel/workqueue.c:2269
Allocated by task 8064:
 __kmalloc+0x23c/0x310 mm/slab.c:3669
 kzalloc include/linux/slab.h:742 [inline]
 xfrm_hash_alloc+0x38/0xe0 net/xfrm/xfrm_hash.c:21
 xfrm_policy_init net/xfrm/xfrm_policy.c:4036 [inline]
 xfrm_net_init+0x269/0xd60 net/xfrm/xfrm_policy.c:4120
 ops_init+0x336/0x420 net/core/net_namespace.c:130
 setup_net+0x212/0x690 net/core/net_namespace.c:316

The faulting address is the address of the old chain head,
free'd by xfrm_hash_resize().

In xfrm_hash_rehash(), chain heads get re-initialized without
any hlist_del_rcu:

 for (i = hmask; i >= 0; i--)
    INIT_HLIST_HEAD(odst + i);

Then, hlist_del_rcu() gets called on the about to-be-reinserted policy
when iterating the per-net list of policies.

hlist_del_rcu() will then make chain->first be nonzero again:

static inline void __hlist_del(struct hlist_node *n)
{
   struct hlist_node *next = n->next;   // address of next element in list
   struct hlist_node **pprev = n->pprev;// location of previous elem, this
                                        // can point at chain->first
        WRITE_ONCE(*pprev, next);       // chain->first points to next elem
        if (next)
                next->pprev = pprev;

Then, when we walk chainlist to find insertion point, we may find a
non-empty list even though we're supposedly reinserting the first
policy to an empty chain.

To fix this first unlink all exact and inexact policies instead of
zeroing the list heads.

Add the commands equivalent to the syzbot reproducer to xfrm_policy.sh,
without fix KASAN catches the corruption as it happens, SLUB poisoning
detects it a bit later.

Reported-by: syzbot+0165480d4ef07360eeda@syzkaller.appspotmail.com
Fixes: 1548bc4e0512 ("xfrm: policy: delete inexact policies from inexact list on hash rebuild")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_policy.c                     | 12 +++++++-----
 tools/testing/selftests/net/xfrm_policy.sh | 27 ++++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7eefdc9be2a7..c411662141ae 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1276,13 +1276,17 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 
 		hlist_for_each_entry_safe(policy, n,
 					  &net->xfrm.policy_inexact[dir],
-					  bydst_inexact_list)
+					  bydst_inexact_list) {
+			hlist_del_rcu(&policy->bydst);
 			hlist_del_init(&policy->bydst_inexact_list);
+		}
 
 		hmask = net->xfrm.policy_bydst[dir].hmask;
 		odst = net->xfrm.policy_bydst[dir].table;
-		for (i = hmask; i >= 0; i--)
-			INIT_HLIST_HEAD(odst + i);
+		for (i = hmask; i >= 0; i--) {
+			hlist_for_each_entry_safe(policy, n, odst + i, bydst)
+				hlist_del_rcu(&policy->bydst);
+		}
 		if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
 			/* dir out => dst = remote, src = local */
 			net->xfrm.policy_bydst[dir].dbits4 = rbits4;
@@ -1311,8 +1315,6 @@ static void xfrm_hash_rebuild(struct work_struct *work)
 		chain = policy_hash_bysel(net, &policy->selector,
 					  policy->family, dir);
 
-		hlist_del_rcu(&policy->bydst);
-
 		if (!chain) {
 			void *p = xfrm_policy_inexact_insert(policy, dir, 0);
 
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 71d7fdc513c1..5445943bf07f 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -257,6 +257,29 @@ check_exceptions()
 	return $lret
 }
 
+check_hthresh_repeat()
+{
+	local log=$1
+	i=0
+
+	for i in $(seq 1 10);do
+		ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+		ip -net ns1 xfrm policy set hthresh6 0 28 || break
+
+		ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+		ip -net ns1 xfrm policy set hthresh6 0 28 || break
+	done
+
+	if [ $i -ne 10 ] ;then
+		echo "FAIL: $log" 1>&2
+		ret=1
+		return 1
+	fi
+
+	echo "PASS: $log"
+	return 0
+}
+
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
@@ -404,7 +427,9 @@ for n in ns3 ns4;do
 	ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
 	sleep $((RANDOM%5))
 done
-check_exceptions "exceptions and block policies after hresh change to normal"
+check_exceptions "exceptions and block policies after htresh change to normal"
+
+check_hthresh_repeat "policies with repeated htresh change"
 
 for i in 1 2 3 4;do ip netns del ns$i;done
 
-- 
cgit v1.2.3-59-g8ed1b


From ac8786c72eba67dfc8ae751a75c586289a1b9b1b Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Fri, 28 Jun 2019 22:57:51 -0700
Subject: selftests: bpf: add tests for shifts by zero

There are currently no tests for ALU64 shift operations when the shift
amount is 0. This adds 6 new tests to make sure they are equivalent
to a no-op. The x32 JIT had such bugs that could have been caught by
these tests.

Cc: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/verifier/basic_instr.c | 85 ++++++++++++++++++++++
 1 file changed, 85 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/basic_instr.c b/tools/testing/selftests/bpf/verifier/basic_instr.c
index ed91a7b9a456..071dbc889e8c 100644
--- a/tools/testing/selftests/bpf/verifier/basic_instr.c
+++ b/tools/testing/selftests/bpf/verifier/basic_instr.c
@@ -90,6 +90,91 @@
 	},
 	.result = ACCEPT,
 },
+{
+	"lsh64 by 0 imm",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_0, 1),
+	BPF_LD_IMM64(BPF_REG_1, 1),
+	BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"rsh64 by 0 imm",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_0, 1),
+	BPF_LD_IMM64(BPF_REG_1, 0x100000000LL),
+	BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+	BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 0),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"arsh64 by 0 imm",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_0, 1),
+	BPF_LD_IMM64(BPF_REG_1, 0x100000000LL),
+	BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+	BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 0),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"lsh64 by 0 reg",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_0, 1),
+	BPF_LD_IMM64(BPF_REG_1, 1),
+	BPF_LD_IMM64(BPF_REG_2, 0),
+	BPF_ALU64_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"rsh64 by 0 reg",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_0, 1),
+	BPF_LD_IMM64(BPF_REG_1, 0x100000000LL),
+	BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+	BPF_LD_IMM64(BPF_REG_3, 0),
+	BPF_ALU64_REG(BPF_RSH, BPF_REG_1, BPF_REG_3),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"arsh64 by 0 reg",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_0, 1),
+	BPF_LD_IMM64(BPF_REG_1, 0x100000000LL),
+	BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+	BPF_LD_IMM64(BPF_REG_3, 0),
+	BPF_ALU64_REG(BPF_ARSH, BPF_REG_1, BPF_REG_3),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 1,
+},
 {
 	"invalid 64-bit BPF_END",
 	.insns = {
-- 
cgit v1.2.3-59-g8ed1b


From cdfc7f888c2a355b01308e97c6df108f1c2b64e8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 2 Jul 2019 08:16:20 -0700
Subject: libbpf: fix GCC8 warning for strncpy

GCC8 started emitting warning about using strncpy with number of bytes
exactly equal destination size, which is generally unsafe, as can lead
to non-zero terminated string being copied. Use IFNAMSIZ - 1 as number
of bytes to ensure name is always zero-terminated.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/xsk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index bf15a80a37c2..b33740221b7e 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -327,7 +327,8 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
 
 	channels.cmd = ETHTOOL_GCHANNELS;
 	ifr.ifr_data = (void *)&channels;
-	strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ);
+	strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
 	err = ioctl(fd, SIOCETHTOOL, &ifr);
 	if (err && errno != EOPNOTSUPP) {
 		ret = -errno;
-- 
cgit v1.2.3-59-g8ed1b


From 33bae185f74d49a0d7b1bfaafb8e959efce0f243 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Tue, 2 Jul 2019 18:25:31 +0800
Subject: bpf, libbpf, smatch: Fix potential NULL pointer dereference

Based on the following report from Smatch, fix the potential NULL
pointer dereference check:

  tools/lib/bpf/libbpf.c:3493
  bpf_prog_load_xattr() warn: variable dereferenced before check 'attr'
  (see line 3483)

  3479 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
  3480                         struct bpf_object **pobj, int *prog_fd)
  3481 {
  3482         struct bpf_object_open_attr open_attr = {
  3483                 .file           = attr->file,
  3484                 .prog_type      = attr->prog_type,
                                         ^^^^^^
  3485         };

At the head of function, it directly access 'attr' without checking
if it's NULL pointer. This patch moves the values assignment after
validating 'attr' and 'attr->file'.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6e6ebef11ba3..4907997289e9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3872,10 +3872,7 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type,
 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 			struct bpf_object **pobj, int *prog_fd)
 {
-	struct bpf_object_open_attr open_attr = {
-		.file		= attr->file,
-		.prog_type	= attr->prog_type,
-	};
+	struct bpf_object_open_attr open_attr = {};
 	struct bpf_program *prog, *first_prog = NULL;
 	enum bpf_attach_type expected_attach_type;
 	enum bpf_prog_type prog_type;
@@ -3888,6 +3885,9 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	if (!attr->file)
 		return -EINVAL;
 
+	open_attr.file = attr->file;
+	open_attr.prog_type = attr->prog_type;
+
 	obj = bpf_object__open_xattr(&open_attr);
 	if (IS_ERR_OR_NULL(obj))
 		return -ENOENT;
-- 
cgit v1.2.3-59-g8ed1b


From d2f5bbbc350050895d9f33c2744a61f9e0af1caa Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 2 Jul 2019 20:26:51 +0200
Subject: selftests: bpf: standardize to static __always_inline

The progs for bpf selftests use several different notations to force
function inlining. Standardize to what most of them use,
static __always_inline.

Suggested-by: Song Liu <liu.song.a23@gmail.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/pyperf.h         |  9 +++---
 tools/testing/selftests/bpf/progs/strobemeta.h     | 36 ++++++++++++----------
 tools/testing/selftests/bpf/progs/test_jhash.h     |  3 +-
 tools/testing/selftests/bpf/progs/test_seg6_loop.c | 23 +++++++-------
 .../selftests/bpf/progs/test_verif_scale2.c        |  2 +-
 5 files changed, 38 insertions(+), 35 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 6b0781391be5..abf6224649be 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -75,8 +75,7 @@ typedef struct {
 	void* co_name; // PyCodeObject.co_name
 } FrameData;
 
-static inline __attribute__((__always_inline__)) void*
-get_thread_state(void* tls_base, PidData* pidData)
+static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
 {
 	void* thread_state;
 	int key;
@@ -87,8 +86,8 @@ get_thread_state(void* tls_base, PidData* pidData)
 	return thread_state;
 }
 
-static inline __attribute__((__always_inline__)) bool
-get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol)
+static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
+					   FrameData *frame, Symbol *symbol)
 {
 	// read data from PyFrameObject
 	bpf_probe_read(&frame->f_back,
@@ -161,7 +160,7 @@ struct bpf_elf_map SEC("maps") stackmap = {
 	.max_elem = 1000,
 };
 
-static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx)
+static __always_inline int __on_event(struct pt_regs *ctx)
 {
 	uint64_t pid_tgid = bpf_get_current_pid_tgid();
 	pid_t pid = (pid_t)(pid_tgid >> 32);
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 1ff73f60a3e4..553bc3b62e89 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -266,8 +266,8 @@ struct tls_index {
 	uint64_t offset;
 };
 
-static inline __attribute__((always_inline))
-void *calc_location(struct strobe_value_loc *loc, void *tls_base)
+static __always_inline void *calc_location(struct strobe_value_loc *loc,
+					   void *tls_base)
 {
 	/*
 	 * tls_mode value is:
@@ -327,10 +327,10 @@ void *calc_location(struct strobe_value_loc *loc, void *tls_base)
 		: NULL;
 }
 
-static inline __attribute__((always_inline))
-void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
-		  struct strobe_value_generic *value,
-		  struct strobemeta_payload *data)
+static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
+					 size_t idx, void *tls_base,
+					 struct strobe_value_generic *value,
+					 struct strobemeta_payload *data)
 {
 	void *location = calc_location(&cfg->int_locs[idx], tls_base);
 	if (!location)
@@ -342,10 +342,11 @@ void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
 		data->int_vals_set_mask |= (1 << idx);
 }
 
-static inline __attribute__((always_inline))
-uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base,
-		      struct strobe_value_generic *value,
-		      struct strobemeta_payload *data, void *payload)
+static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
+					     size_t idx, void *tls_base,
+					     struct strobe_value_generic *value,
+					     struct strobemeta_payload *data,
+					     void *payload)
 {
 	void *location;
 	uint32_t len;
@@ -371,10 +372,11 @@ uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base,
 	return len;
 }
 
-static inline __attribute__((always_inline))
-void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
-		   struct strobe_value_generic *value,
-		   struct strobemeta_payload* data, void *payload)
+static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
+					  size_t idx, void *tls_base,
+					  struct strobe_value_generic *value,
+					  struct strobemeta_payload *data,
+					  void *payload)
 {
 	struct strobe_map_descr* descr = &data->map_descrs[idx];
 	struct strobe_map_raw map;
@@ -435,9 +437,9 @@ void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base,
  * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
  * pointer to *right after* payload ends
  */
-static inline __attribute__((always_inline))
-void *read_strobe_meta(struct task_struct* task,
-		       struct strobemeta_payload* data) {
+static __always_inline void *read_strobe_meta(struct task_struct *task,
+					      struct strobemeta_payload *data)
+{
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
 	struct strobe_value_generic value = {0};
 	struct strobemeta_cfg *cfg;
diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h
index 3d12c11a8d47..c300734d26f6 100644
--- a/tools/testing/selftests/bpf/progs/test_jhash.h
+++ b/tools/testing/selftests/bpf/progs/test_jhash.h
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
+#include <features.h>
 
 typedef unsigned int u32;
 
-static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift)
+static __always_inline u32 rol32(u32 word, unsigned int shift)
 {
 	return (word << shift) | (word >> ((-shift) & 31));
 }
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index 463964d79f73..1dbe1d4d467e 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -54,7 +54,7 @@ struct sr6_tlv_t {
 	unsigned char value[0];
 } BPF_PACKET_HEADER;
 
-static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
 {
 	void *cursor, *data_end;
 	struct ip6_srh_t *srh;
@@ -88,9 +88,9 @@ static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff
 	return srh;
 }
 
-static __attribute__((always_inline))
-int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
-		   uint32_t old_pad, uint32_t pad_off)
+static __always_inline int update_tlv_pad(struct __sk_buff *skb,
+					  uint32_t new_pad, uint32_t old_pad,
+					  uint32_t pad_off)
 {
 	int err;
 
@@ -118,10 +118,11 @@ int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
 	return 0;
 }
 
-static __attribute__((always_inline))
-int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
-			  uint32_t *tlv_off, uint32_t *pad_size,
-			  uint32_t *pad_off)
+static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
+						 struct ip6_srh_t *srh,
+						 uint32_t *tlv_off,
+						 uint32_t *pad_size,
+						 uint32_t *pad_off)
 {
 	uint32_t srh_off, cur_off;
 	int offset_valid = 0;
@@ -177,9 +178,9 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
 	return 0;
 }
 
-static __attribute__((always_inline))
-int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
-	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
+static __always_inline int add_tlv(struct __sk_buff *skb,
+				   struct ip6_srh_t *srh, uint32_t tlv_off,
+				   struct sr6_tlv_t *itlv, uint8_t tlv_size)
 {
 	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
 	uint8_t len_remaining, new_pad;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index 77830693eccb..9897150ed516 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -2,7 +2,7 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
-#define ATTR __attribute__((always_inline))
+#define ATTR __always_inline
 #include "test_jhash.h"
 
 SEC("scale90_inline")
-- 
cgit v1.2.3-59-g8ed1b


From 11aca65ec4db09527d3e9b6b41a0615b7da4386b Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 2 Jul 2019 19:40:31 +0200
Subject: selftests: bpf: fix inlines in test_lwt_seg6local

Selftests are reporting this failure in test_lwt_seg6local.sh:

+ ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
Error fetching program/map!
Failed to parse eBPF program: Operation not permitted

The problem is __attribute__((always_inline)) alone is not enough to prevent
clang from inserting those functions in .text. In that case, .text is not
marked as relocateable.

See the output of objdump -h test_lwt_seg6local.o:

Idx Name          Size      VMA               LMA               File off  Algn
  0 .text         00003530  0000000000000000  0000000000000000  00000040  2**3
                  CONTENTS, ALLOC, LOAD, READONLY, CODE

This causes the iproute bpf loader to fail in bpf_fetch_prog_sec:
bpf_has_call_data returns true but bpf_fetch_prog_relo fails as there's no
relocateable .text section in the file.

To fix this, convert to 'static __always_inline'.

v2: Use 'static __always_inline' instead of 'static inline
    __attribute__((always_inline))'

Fixes: c99a84eac026 ("selftests/bpf: test for seg6local End.BPF action")
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/test_lwt_seg6local.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index 0575751bc1bc..e2f6ed0a583d 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -61,7 +61,7 @@ struct sr6_tlv_t {
 	unsigned char value[0];
 } BPF_PACKET_HEADER;
 
-__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
 {
 	void *cursor, *data_end;
 	struct ip6_srh_t *srh;
@@ -95,7 +95,7 @@ __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
 	return srh;
 }
 
-__attribute__((always_inline))
+static __always_inline
 int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
 		   uint32_t old_pad, uint32_t pad_off)
 {
@@ -125,7 +125,7 @@ int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
 	return 0;
 }
 
-__attribute__((always_inline))
+static __always_inline
 int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
 			  uint32_t *tlv_off, uint32_t *pad_size,
 			  uint32_t *pad_off)
@@ -184,7 +184,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
 	return 0;
 }
 
-__attribute__((always_inline))
+static __always_inline
 int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
 	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
 {
@@ -228,7 +228,7 @@ int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
 	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
 }
 
-__attribute__((always_inline))
+static __always_inline
 int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
 	       uint32_t tlv_off)
 {
@@ -266,7 +266,7 @@ int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
 	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
 }
 
-__attribute__((always_inline))
+static __always_inline
 int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
 {
 	int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
-- 
cgit v1.2.3-59-g8ed1b


From 692cbaa99fe446ed30bfb7e53fb2b4cc518331a1 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 2 Jul 2019 09:14:00 -0700
Subject: bpf/tools: sync bpf.h

Sync new bpf_tcp_sock fields and new BPF_PROG_TYPE_SOCK_OPS RTT callback.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Priyaranjan Jha <priyarjha@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a396b516a2b2..cecf42c871d4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1767,6 +1767,7 @@ union bpf_attr {
  * 		* **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
  * 		* **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
  * 		* **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * 		* **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
  *
  * 		Therefore, this function can be used to clear a callback flag by
  * 		setting the appropriate bit to zero. e.g. to disable the RTO
@@ -3069,6 +3070,12 @@ struct bpf_tcp_sock {
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
 				 */
+	__u32 dsack_dups;	/* RFC4898 tcpEStatsStackDSACKDups
+				 * total number of DSACK blocks received
+				 */
+	__u32 delivered;	/* Total data packets delivered incl. rexmits */
+	__u32 delivered_ce;	/* Like the above but only ECE marked packets */
+	__u32 icsk_retransmits;	/* Number of unrecovered [RTO] timeouts */
 };
 
 struct bpf_sock_tuple {
@@ -3311,7 +3318,8 @@ struct bpf_sock_ops {
 #define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
 #define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
 #define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7		/* Mask of all currently
+#define BPF_SOCK_OPS_RTT_CB_FLAG	(1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0xF		/* Mask of all currently
 							 * supported cb flags
 							 */
 
@@ -3366,6 +3374,8 @@ enum {
 	BPF_SOCK_OPS_TCP_LISTEN_CB,	/* Called on listen(2), right after
 					 * socket transition to LISTEN state.
 					 */
+	BPF_SOCK_OPS_RTT_CB,		/* Called on every RTT.
+					 */
 };
 
 /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
-- 
cgit v1.2.3-59-g8ed1b


From b55873984dab0eda6db00d19db4c43dce5c11d98 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 2 Jul 2019 09:14:01 -0700
Subject: selftests/bpf: test BPF_SOCK_OPS_RTT_CB

Make sure the callback is invoked for syn-ack and data packet.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Priyaranjan Jha <priyarjha@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/Makefile        |   3 +-
 tools/testing/selftests/bpf/progs/tcp_rtt.c |  61 +++++++
 tools/testing/selftests/bpf/test_tcp_rtt.c  | 254 ++++++++++++++++++++++++++++
 3 files changed, 317 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/tcp_rtt.c
 create mode 100644 tools/testing/selftests/bpf/test_tcp_rtt.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index de1754a8f5fe..2620406a53ec 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -27,7 +27,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_cgroup_storage test_select_reuseport test_section_names \
 	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
 	test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \
-	test_sockopt_multi
+	test_sockopt_multi test_tcp_rtt
 
 BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
 TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -107,6 +107,7 @@ $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
 $(OUTPUT)/test_sockopt: cgroup_helpers.c
 $(OUTPUT)/test_sockopt_sk: cgroup_helpers.c
 $(OUTPUT)/test_sockopt_multi: cgroup_helpers.c
+$(OUTPUT)/test_tcp_rtt: cgroup_helpers.c
 
 .PHONY: force
 
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
new file mode 100644
index 000000000000..233bdcb1659e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
+
+struct tcp_rtt_storage {
+	__u32 invoked;
+	__u32 dsack_dups;
+	__u32 delivered;
+	__u32 delivered_ce;
+	__u32 icsk_retransmits;
+};
+
+struct bpf_map_def SEC("maps") socket_storage_map = {
+	.type = BPF_MAP_TYPE_SK_STORAGE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct tcp_rtt_storage),
+	.map_flags = BPF_F_NO_PREALLOC,
+};
+BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage);
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+	struct tcp_rtt_storage *storage;
+	struct bpf_tcp_sock *tcp_sk;
+	int op = (int) ctx->op;
+	struct bpf_sock *sk;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 1;
+
+	if (op == BPF_SOCK_OPS_TCP_CONNECT_CB) {
+		bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG);
+		return 1;
+	}
+
+	if (op != BPF_SOCK_OPS_RTT_CB)
+		return 1;
+
+	tcp_sk = bpf_tcp_sock(sk);
+	if (!tcp_sk)
+		return 1;
+
+	storage->invoked++;
+
+	storage->dsack_dups = tcp_sk->dsack_dups;
+	storage->delivered = tcp_sk->delivered;
+	storage->delivered_ce = tcp_sk->delivered_ce;
+	storage->icsk_retransmits = tcp_sk->icsk_retransmits;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/test_tcp_rtt.c
new file mode 100644
index 000000000000..90c3862f74a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_rtt.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <pthread.h>
+
+#include <linux/filter.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH                                "/tcp_rtt"
+
+struct tcp_rtt_storage {
+	__u32 invoked;
+	__u32 dsack_dups;
+	__u32 delivered;
+	__u32 delivered_ce;
+	__u32 icsk_retransmits;
+};
+
+static void send_byte(int fd)
+{
+	char b = 0x55;
+
+	if (write(fd, &b, sizeof(b)) != 1)
+		error(1, errno, "Failed to send single byte");
+}
+
+static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
+		     __u32 dsack_dups, __u32 delivered, __u32 delivered_ce,
+		     __u32 icsk_retransmits)
+{
+	int err = 0;
+	struct tcp_rtt_storage val;
+
+	if (bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)
+		error(1, errno, "Failed to read socket storage");
+
+	if (val.invoked != invoked) {
+		log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d",
+			msg, val.invoked, invoked);
+		err++;
+	}
+
+	if (val.dsack_dups != dsack_dups) {
+		log_err("%s: unexpected bpf_tcp_sock.dsack_dups %d != %d",
+			msg, val.dsack_dups, dsack_dups);
+		err++;
+	}
+
+	if (val.delivered != delivered) {
+		log_err("%s: unexpected bpf_tcp_sock.delivered %d != %d",
+			msg, val.delivered, delivered);
+		err++;
+	}
+
+	if (val.delivered_ce != delivered_ce) {
+		log_err("%s: unexpected bpf_tcp_sock.delivered_ce %d != %d",
+			msg, val.delivered_ce, delivered_ce);
+		err++;
+	}
+
+	if (val.icsk_retransmits != icsk_retransmits) {
+		log_err("%s: unexpected bpf_tcp_sock.icsk_retransmits %d != %d",
+			msg, val.icsk_retransmits, icsk_retransmits);
+		err++;
+	}
+
+	return err;
+}
+
+static int connect_to_server(int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		goto out;
+	}
+
+	if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+		log_err("Fail to connect to server");
+		goto out;
+	}
+
+	return fd;
+
+out:
+	close(fd);
+	return -1;
+}
+
+static int run_test(int cgroup_fd, int server_fd)
+{
+	struct bpf_prog_load_attr attr = {
+		.prog_type = BPF_PROG_TYPE_SOCK_OPS,
+		.file = "./tcp_rtt.o",
+		.expected_attach_type = BPF_CGROUP_SOCK_OPS,
+	};
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	int client_fd;
+	int prog_fd;
+	int map_fd;
+	int err;
+
+	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	map = bpf_map__next(NULL, obj);
+	map_fd = bpf_map__fd(map);
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	client_fd = connect_to_server(server_fd);
+	if (client_fd < 0) {
+		err = -1;
+		goto close_bpf_object;
+	}
+
+	err += verify_sk(map_fd, client_fd, "syn-ack",
+			 /*invoked=*/1,
+			 /*dsack_dups=*/0,
+			 /*delivered=*/1,
+			 /*delivered_ce=*/0,
+			 /*icsk_retransmits=*/0);
+
+	send_byte(client_fd);
+
+	err += verify_sk(map_fd, client_fd, "first payload byte",
+			 /*invoked=*/2,
+			 /*dsack_dups=*/0,
+			 /*delivered=*/2,
+			 /*delivered_ce=*/0,
+			 /*icsk_retransmits=*/0);
+
+	close(client_fd);
+
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+static int start_server(void)
+{
+	struct sockaddr_in addr = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+	};
+	int fd;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create server socket");
+		return -1;
+	}
+
+	if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
+		log_err("Failed to bind socket");
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+static void *server_thread(void *arg)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd = *(int *)arg;
+	int client_fd;
+
+	if (listen(fd, 1) < 0)
+		error(1, errno, "Failed to listed on socket");
+
+	client_fd = accept(fd, (struct sockaddr *)&addr, &len);
+	if (client_fd < 0)
+		error(1, errno, "Failed to accept client");
+
+	/* Wait for the next connection (that never arrives)
+	 * to keep this thread alive to prevent calling
+	 * close() on client_fd.
+	 */
+	if (accept(fd, (struct sockaddr *)&addr, &len) >= 0)
+		error(1, errno, "Unexpected success in second accept");
+
+	close(client_fd);
+
+	return NULL;
+}
+
+int main(int args, char **argv)
+{
+	int server_fd, cgroup_fd;
+	int err = EXIT_SUCCESS;
+	pthread_t tid;
+
+	if (setup_cgroup_environment())
+		goto cleanup_obj;
+
+	cgroup_fd = create_and_get_cgroup(CG_PATH);
+	if (cgroup_fd < 0)
+		goto cleanup_cgroup_env;
+
+	if (join_cgroup(CG_PATH))
+		goto cleanup_cgroup;
+
+	server_fd = start_server();
+	if (server_fd < 0) {
+		err = EXIT_FAILURE;
+		goto cleanup_cgroup;
+	}
+
+	pthread_create(&tid, NULL, server_thread, (void *)&server_fd);
+
+	if (run_test(cgroup_fd, server_fd))
+		err = EXIT_FAILURE;
+
+	close(server_fd);
+
+	printf("test_sockopt_sk: %s\n",
+	       err == EXIT_SUCCESS ? "PASSED" : "FAILED");
+
+cleanup_cgroup:
+	close(cgroup_fd);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+cleanup_obj:
+	return err;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 07a4ddec3ce9b0a533b5f90f582f1057390d5e63 Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.ch>
Date: Tue, 2 Jul 2019 19:43:54 +0200
Subject: bonding: add an option to specify a delay between peer notifications

Currently, gratuitous ARP/ND packets are sent every `miimon'
milliseconds. This commit allows a user to specify a custom delay
through a new option, `peer_notif_delay'.

Like for `updelay' and `downdelay', this delay should be a multiple of
`miimon' to avoid managing an additional work queue. The configuration
logic is copied from `updelay' and `downdelay'. However, the default
value cannot be set using a module parameter: Netlink or sysfs should
be used to configure this feature.

When setting `miimon' to 100 and `peer_notif_delay' to 500, we can
observe the 500 ms delay is respected:

    20:30:19.354693 ARP, Request who-has 203.0.113.10 tell 203.0.113.10, length 28
    20:30:19.874892 ARP, Request who-has 203.0.113.10 tell 203.0.113.10, length 28
    20:30:20.394919 ARP, Request who-has 203.0.113.10 tell 203.0.113.10, length 28
    20:30:20.914963 ARP, Request who-has 203.0.113.10 tell 203.0.113.10, length 28

In bond_mii_monitor(), I have tried to keep the lock logic readable.
The change is due to the fact we cannot rely on a notification to
lower the value of `bond->send_peer_notif' as `NETDEV_NOTIFY_PEERS' is
only triggered once every N times, while we need to decrement the
counter each time.

iproute2 also needs to be updated to be able to specify this new
attribute through `ip link'.

Signed-off-by: Vincent Bernat <vincent@bernat.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c    | 31 +++++++++++------
 drivers/net/bonding/bond_netlink.c | 14 ++++++++
 drivers/net/bonding/bond_options.c | 71 ++++++++++++++++++++++++--------------
 drivers/net/bonding/bond_procfs.c  |  2 ++
 drivers/net/bonding/bond_sysfs.c   | 13 +++++++
 include/net/bond_options.h         |  1 +
 include/net/bonding.h              |  1 +
 include/uapi/linux/if_link.h       |  1 +
 tools/include/uapi/linux/if_link.h |  1 +
 9 files changed, 98 insertions(+), 37 deletions(-)

(limited to 'tools')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 84168455aded..302499ae05e6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -796,6 +796,8 @@ static bool bond_should_notify_peers(struct bonding *bond)
 		   slave ? slave->dev->name : "NULL");
 
 	if (!slave || !bond->send_peer_notif ||
+	    bond->send_peer_notif %
+	    max(1, bond->params.peer_notif_delay) != 0 ||
 	    !netif_carrier_ok(bond->dev) ||
 	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
 		return false;
@@ -886,15 +888,18 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 
 			if (netif_running(bond->dev)) {
 				bond->send_peer_notif =
-					bond->params.num_peer_notif;
+					bond->params.num_peer_notif *
+					max(1, bond->params.peer_notif_delay);
 				should_notify_peers =
 					bond_should_notify_peers(bond);
 			}
 
 			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
-			if (should_notify_peers)
+			if (should_notify_peers) {
+				bond->send_peer_notif--;
 				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
 							 bond->dev);
+			}
 		}
 	}
 
@@ -2279,6 +2284,7 @@ static void bond_mii_monitor(struct work_struct *work)
 	struct bonding *bond = container_of(work, struct bonding,
 					    mii_work.work);
 	bool should_notify_peers = false;
+	bool commit;
 	unsigned long delay;
 	struct slave *slave;
 	struct list_head *iter;
@@ -2289,12 +2295,19 @@ static void bond_mii_monitor(struct work_struct *work)
 		goto re_arm;
 
 	rcu_read_lock();
-
 	should_notify_peers = bond_should_notify_peers(bond);
-
-	if (bond_miimon_inspect(bond)) {
+	commit = !!bond_miimon_inspect(bond);
+	if (bond->send_peer_notif) {
+		rcu_read_unlock();
+		if (rtnl_trylock()) {
+			bond->send_peer_notif--;
+			rtnl_unlock();
+		}
+	} else {
 		rcu_read_unlock();
+	}
 
+	if (commit) {
 		/* Race avoidance with bond_close cancel of workqueue */
 		if (!rtnl_trylock()) {
 			delay = 1;
@@ -2308,8 +2321,7 @@ static void bond_mii_monitor(struct work_struct *work)
 		bond_miimon_commit(bond);
 
 		rtnl_unlock();	/* might sleep, hold no other locks */
-	} else
-		rcu_read_unlock();
+	}
 
 re_arm:
 	if (bond->params.miimon)
@@ -3065,10 +3077,6 @@ static int bond_master_netdev_event(unsigned long event,
 	case NETDEV_REGISTER:
 		bond_create_proc_entry(event_bond);
 		break;
-	case NETDEV_NOTIFY_PEERS:
-		if (event_bond->send_peer_notif)
-			event_bond->send_peer_notif--;
-		break;
 	default:
 		break;
 	}
@@ -4691,6 +4699,7 @@ static int bond_check_params(struct bond_params *params)
 	params->arp_all_targets = arp_all_targets_value;
 	params->updelay = updelay;
 	params->downdelay = downdelay;
+	params->peer_notif_delay = 0;
 	params->use_carrier = use_carrier;
 	params->lacp_fast = lacp_fast;
 	params->primary[0] = 0;
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index b24cce48ae35..a259860a7208 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -108,6 +108,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
 	[IFLA_BOND_AD_ACTOR_SYSTEM]	= { .type = NLA_BINARY,
 					    .len  = ETH_ALEN },
 	[IFLA_BOND_TLB_DYNAMIC_LB]	= { .type = NLA_U8 },
+	[IFLA_BOND_PEER_NOTIF_DELAY]    = { .type = NLA_U32 },
 };
 
 static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
@@ -215,6 +216,14 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
 		if (err)
 			return err;
 	}
+	if (data[IFLA_BOND_PEER_NOTIF_DELAY]) {
+		int delay = nla_get_u32(data[IFLA_BOND_PEER_NOTIF_DELAY]);
+
+		bond_opt_initval(&newval, delay);
+		err = __bond_opt_set(bond, BOND_OPT_PEER_NOTIF_DELAY, &newval);
+		if (err)
+			return err;
+	}
 	if (data[IFLA_BOND_USE_CARRIER]) {
 		int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]);
 
@@ -494,6 +503,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
 		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */
 		nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */
 		nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */
+		nla_total_size(sizeof(u32)) +	/* IFLA_BOND_PEER_NOTIF_DELAY */
 		0;
 }
 
@@ -536,6 +546,10 @@ static int bond_fill_info(struct sk_buff *skb,
 			bond->params.downdelay * bond->params.miimon))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, IFLA_BOND_PEER_NOTIF_DELAY,
+			bond->params.downdelay * bond->params.miimon))
+		goto nla_put_failure;
+
 	if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier))
 		goto nla_put_failure;
 
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 0d852fe9da7c..ddb3916d3506 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -24,6 +24,8 @@ static int bond_option_updelay_set(struct bonding *bond,
 				   const struct bond_opt_value *newval);
 static int bond_option_downdelay_set(struct bonding *bond,
 				     const struct bond_opt_value *newval);
+static int bond_option_peer_notif_delay_set(struct bonding *bond,
+					    const struct bond_opt_value *newval);
 static int bond_option_use_carrier_set(struct bonding *bond,
 				       const struct bond_opt_value *newval);
 static int bond_option_arp_interval_set(struct bonding *bond,
@@ -424,6 +426,13 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
 		.desc = "Number of peer notifications to send on failover event",
 		.values = bond_num_peer_notif_tbl,
 		.set = bond_option_num_peer_notif_set
+	},
+	[BOND_OPT_PEER_NOTIF_DELAY] = {
+		.id = BOND_OPT_PEER_NOTIF_DELAY,
+		.name = "peer_notif_delay",
+		.desc = "Delay between each peer notification on failover event, in milliseconds",
+		.values = bond_intmax_tbl,
+		.set = bond_option_peer_notif_delay_set
 	}
 };
 
@@ -841,6 +850,9 @@ static int bond_option_miimon_set(struct bonding *bond,
 	if (bond->params.downdelay)
 		netdev_dbg(bond->dev, "Note: Updating downdelay (to %d) since it is a multiple of the miimon value\n",
 			   bond->params.downdelay * bond->params.miimon);
+	if (bond->params.peer_notif_delay)
+		netdev_dbg(bond->dev, "Note: Updating peer_notif_delay (to %d) since it is a multiple of the miimon value\n",
+			   bond->params.peer_notif_delay * bond->params.miimon);
 	if (newval->value && bond->params.arp_interval) {
 		netdev_dbg(bond->dev, "MII monitoring cannot be used with ARP monitoring - disabling ARP monitoring...\n");
 		bond->params.arp_interval = 0;
@@ -864,52 +876,59 @@ static int bond_option_miimon_set(struct bonding *bond,
 	return 0;
 }
 
-/* Set up and down delays. These must be multiples of the
- * MII monitoring value, and are stored internally as the multiplier.
- * Thus, we must translate to MS for the real world.
+/* Set up, down and peer notification delays. These must be multiples
+ * of the MII monitoring value, and are stored internally as the
+ * multiplier. Thus, we must translate to MS for the real world.
  */
-static int bond_option_updelay_set(struct bonding *bond,
-				   const struct bond_opt_value *newval)
+static int _bond_option_delay_set(struct bonding *bond,
+				  const struct bond_opt_value *newval,
+				  const char *name,
+				  int *target)
 {
 	int value = newval->value;
 
 	if (!bond->params.miimon) {
-		netdev_err(bond->dev, "Unable to set up delay as MII monitoring is disabled\n");
+		netdev_err(bond->dev, "Unable to set %s as MII monitoring is disabled\n",
+			   name);
 		return -EPERM;
 	}
 	if ((value % bond->params.miimon) != 0) {
-		netdev_warn(bond->dev, "up delay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n",
+		netdev_warn(bond->dev,
+			    "%s (%d) is not a multiple of miimon (%d), value rounded to %d ms\n",
+			    name,
 			    value, bond->params.miimon,
 			    (value / bond->params.miimon) *
 			    bond->params.miimon);
 	}
-	bond->params.updelay = value / bond->params.miimon;
-	netdev_dbg(bond->dev, "Setting up delay to %d\n",
-		   bond->params.updelay * bond->params.miimon);
+	*target = value / bond->params.miimon;
+	netdev_dbg(bond->dev, "Setting %s to %d\n",
+		   name,
+		   *target * bond->params.miimon);
 
 	return 0;
 }
 
+static int bond_option_updelay_set(struct bonding *bond,
+				   const struct bond_opt_value *newval)
+{
+	return _bond_option_delay_set(bond, newval, "up delay",
+				      &bond->params.updelay);
+}
+
 static int bond_option_downdelay_set(struct bonding *bond,
 				     const struct bond_opt_value *newval)
 {
-	int value = newval->value;
-
-	if (!bond->params.miimon) {
-		netdev_err(bond->dev, "Unable to set down delay as MII monitoring is disabled\n");
-		return -EPERM;
-	}
-	if ((value % bond->params.miimon) != 0) {
-		netdev_warn(bond->dev, "down delay (%d) is not a multiple of miimon (%d), delay rounded to %d ms\n",
-			    value, bond->params.miimon,
-			    (value / bond->params.miimon) *
-			    bond->params.miimon);
-	}
-	bond->params.downdelay = value / bond->params.miimon;
-	netdev_dbg(bond->dev, "Setting down delay to %d\n",
-		   bond->params.downdelay * bond->params.miimon);
+	return _bond_option_delay_set(bond, newval, "down delay",
+				      &bond->params.downdelay);
+}
 
-	return 0;
+static int bond_option_peer_notif_delay_set(struct bonding *bond,
+					    const struct bond_opt_value *newval)
+{
+	int ret = _bond_option_delay_set(bond, newval,
+					 "peer notification delay",
+					 &bond->params.peer_notif_delay);
+	return ret;
 }
 
 static int bond_option_use_carrier_set(struct bonding *bond,
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 9f7d83e827c3..fd5c9cbe45b1 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -104,6 +104,8 @@ static void bond_info_show_master(struct seq_file *seq)
 		   bond->params.updelay * bond->params.miimon);
 	seq_printf(seq, "Down Delay (ms): %d\n",
 		   bond->params.downdelay * bond->params.miimon);
+	seq_printf(seq, "Peer Notification Delay (ms): %d\n",
+		   bond->params.peer_notif_delay * bond->params.miimon);
 
 
 	/* ARP information */
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index 94214eaf53c5..2d615a93685e 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -327,6 +327,18 @@ static ssize_t bonding_show_updelay(struct device *d,
 static DEVICE_ATTR(updelay, 0644,
 		   bonding_show_updelay, bonding_sysfs_store_option);
 
+static ssize_t bonding_show_peer_notif_delay(struct device *d,
+					     struct device_attribute *attr,
+					     char *buf)
+{
+	struct bonding *bond = to_bond(d);
+
+	return sprintf(buf, "%d\n",
+		       bond->params.peer_notif_delay * bond->params.miimon);
+}
+static DEVICE_ATTR(peer_notif_delay, 0644,
+		   bonding_show_peer_notif_delay, bonding_sysfs_store_option);
+
 /* Show the LACP interval. */
 static ssize_t bonding_show_lacp(struct device *d,
 				 struct device_attribute *attr,
@@ -718,6 +730,7 @@ static struct attribute *per_bond_attrs[] = {
 	&dev_attr_arp_ip_target.attr,
 	&dev_attr_downdelay.attr,
 	&dev_attr_updelay.attr,
+	&dev_attr_peer_notif_delay.attr,
 	&dev_attr_lacp_rate.attr,
 	&dev_attr_ad_select.attr,
 	&dev_attr_xmit_hash_policy.attr,
diff --git a/include/net/bond_options.h b/include/net/bond_options.h
index 2a05cc349018..9d382f2f0bc5 100644
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -63,6 +63,7 @@ enum {
 	BOND_OPT_AD_ACTOR_SYSTEM,
 	BOND_OPT_AD_USER_PORT_KEY,
 	BOND_OPT_NUM_PEER_NOTIF_ALIAS,
+	BOND_OPT_PEER_NOTIF_DELAY,
 	BOND_OPT_LAST
 };
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 676e7fae05a3..f7fe45689142 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -123,6 +123,7 @@ struct bond_params {
 	int fail_over_mac;
 	int updelay;
 	int downdelay;
+	int peer_notif_delay;
 	int lacp_fast;
 	unsigned int min_links;
 	int ad_select;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6f75bda2c2d7..4a8c02cafa9a 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -636,6 +636,7 @@ enum {
 	IFLA_BOND_AD_USER_PORT_KEY,
 	IFLA_BOND_AD_ACTOR_SYSTEM,
 	IFLA_BOND_TLB_DYNAMIC_LB,
+	IFLA_BOND_PEER_NOTIF_DELAY,
 	__IFLA_BOND_MAX,
 };
 
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 5b225ff63b48..7d113a9602f0 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -636,6 +636,7 @@ enum {
 	IFLA_BOND_AD_USER_PORT_KEY,
 	IFLA_BOND_AD_ACTOR_SYSTEM,
 	IFLA_BOND_TLB_DYNAMIC_LB,
+	IFLA_BOND_PEER_NOTIF_DELAY,
 	__IFLA_BOND_MAX,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From d66f43666a6811bcc3e529d5a088e1bdde94ca1b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:58:55 -0700
Subject: libbpf: make libbpf_strerror_r agnostic to sign of error

It's often inconvenient to switch sign of error when passing it into
libbpf_strerror_r. It's better for it to handle that automatically.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/str_error.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
index 00e48ac5b806..b8064eedc177 100644
--- a/tools/lib/bpf/str_error.c
+++ b/tools/lib/bpf/str_error.c
@@ -11,7 +11,7 @@
  */
 char *libbpf_strerror_r(int err, char *dst, int len)
 {
-	int ret = strerror_r(err, dst, len);
+	int ret = strerror_r(err < 0 ? -err : err, dst, len);
 	if (ret)
 		snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
 	return dst;
-- 
cgit v1.2.3-59-g8ed1b


From 1c2e9efc26f389effc86e9b4d41f85d101f17629 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:58:56 -0700
Subject: libbpf: introduce concept of bpf_link

bpf_link is an abstraction of an association of a BPF program and one of
many possible BPF attachment points (hooks). This allows to have uniform
interface for detaching BPF programs regardless of the nature of link
and how it was created. Details of creation and setting up of a specific
bpf_link is handled by corresponding attachment methods
(bpf_program__attach_xxx) added in subsequent commits. Once successfully
created, bpf_link has to be eventually destroyed with
bpf_link__destroy(), at which point BPF program is disassociated from
a hook and all the relevant resources are freed.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c   | 17 +++++++++++++++++
 tools/lib/bpf/libbpf.h   |  4 ++++
 tools/lib/bpf/libbpf.map |  3 ++-
 3 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 4907997289e9..f308690abd50 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3941,6 +3941,23 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	return 0;
 }
 
+struct bpf_link {
+	int (*destroy)(struct bpf_link *link);
+};
+
+int bpf_link__destroy(struct bpf_link *link)
+{
+	int err;
+
+	if (!link)
+		return 0;
+
+	err = link->destroy(link);
+	free(link);
+
+	return err;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 			   void **copy_mem, size_t *copy_size,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index d639f47e3110..5082a5ebb0c2 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -165,6 +165,10 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
 LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
 LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
 
+struct bpf_link;
+
+LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
+
 struct bpf_insn;
 
 /*
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 2c6d835620d2..3cde850fc8da 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -167,10 +167,11 @@ LIBBPF_0.0.3 {
 
 LIBBPF_0.0.4 {
 	global:
+		bpf_link__destroy;
+		bpf_object__load_xattr;
 		btf_dump__dump_type;
 		btf_dump__free;
 		btf_dump__new;
 		btf__parse_elf;
-		bpf_object__load_xattr;
 		libbpf_num_possible_cpus;
 } LIBBPF_0.0.3;
-- 
cgit v1.2.3-59-g8ed1b


From 63f2f5ee856ba9db751123bc5519be099a3849a7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:58:57 -0700
Subject: libbpf: add ability to attach/detach BPF program to perf event

bpf_program__attach_perf_event allows to attach BPF program to existing
perf event hook, providing most generic and most low-level way to attach BPF
programs. It returns struct bpf_link, which should be passed to
bpf_link__destroy to detach and free resources, associated with a link.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c   | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |  3 +++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 67 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f308690abd50..54ce22f9f777 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -32,6 +32,7 @@
 #include <linux/limits.h>
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
+#include <sys/ioctl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -3958,6 +3959,68 @@ int bpf_link__destroy(struct bpf_link *link)
 	return err;
 }
 
+struct bpf_link_fd {
+	struct bpf_link link; /* has to be at the top of struct */
+	int fd; /* hook FD */
+};
+
+static int bpf_link__destroy_perf_event(struct bpf_link *link)
+{
+	struct bpf_link_fd *l = (void *)link;
+	int err;
+
+	err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
+	if (err)
+		err = -errno;
+
+	close(l->fd);
+	return err;
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
+						int pfd)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link_fd *link;
+	int prog_fd, err;
+
+	if (pfd < 0) {
+		pr_warning("program '%s': invalid perf event FD %d\n",
+			   bpf_program__title(prog, false), pfd);
+		return ERR_PTR(-EINVAL);
+	}
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
+			   bpf_program__title(prog, false));
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = malloc(sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->link.destroy = &bpf_link__destroy_perf_event;
+	link->fd = pfd;
+
+	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+		err = -errno;
+		free(link);
+		pr_warning("program '%s': failed to attach to pfd %d: %s\n",
+			   bpf_program__title(prog, false), pfd,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return ERR_PTR(err);
+	}
+	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+		err = -errno;
+		free(link);
+		pr_warning("program '%s': failed to enable pfd %d: %s\n",
+			   bpf_program__title(prog, false), pfd,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return ERR_PTR(err);
+	}
+	return (struct bpf_link *)link;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 			   void **copy_mem, size_t *copy_size,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5082a5ebb0c2..1bf66c4a9330 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -169,6 +169,9 @@ struct bpf_link;
 
 LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
 
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+
 struct bpf_insn;
 
 /*
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 3cde850fc8da..756f5aa802e9 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -169,6 +169,7 @@ LIBBPF_0.0.4 {
 	global:
 		bpf_link__destroy;
 		bpf_object__load_xattr;
+		bpf_program__attach_perf_event;
 		btf_dump__dump_type;
 		btf_dump__free;
 		btf_dump__new;
-- 
cgit v1.2.3-59-g8ed1b


From b26500274767bacbe571c75926fee2489f91ba28 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:58:58 -0700
Subject: libbpf: add kprobe/uprobe attach API

Add ability to attach to kernel and user probes and retprobes.
Implementation depends on perf event support for kprobes/uprobes.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c   | 169 +++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |   7 ++
 tools/lib/bpf/libbpf.map |   2 +
 3 files changed, 178 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 54ce22f9f777..b5b49ddb152f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4021,6 +4021,175 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
 	return (struct bpf_link *)link;
 }
 
+/*
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
+ * given file using scanf format string fmt. If actual parsed value is
+ * negative, the result might be indistinguishable from error
+ */
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+	char buf[STRERR_BUFSIZE];
+	int err, ret;
+	FILE *f;
+
+	f = fopen(file, "r");
+	if (!f) {
+		err = -errno;
+		pr_debug("failed to open '%s': %s\n", file,
+			 libbpf_strerror_r(err, buf, sizeof(buf)));
+		return err;
+	}
+	err = fscanf(f, fmt, &ret);
+	if (err != 1) {
+		err = err == EOF ? -EIO : -errno;
+		pr_debug("failed to parse '%s': %s\n", file,
+			libbpf_strerror_r(err, buf, sizeof(buf)));
+		fclose(f);
+		return err;
+	}
+	fclose(f);
+	return ret;
+}
+
+static int determine_kprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_kprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
+				 uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	type = uprobe ? determine_uprobe_perf_type()
+		      : determine_kprobe_perf_type();
+	if (type < 0) {
+		pr_warning("failed to determine %s perf type: %s\n",
+			   uprobe ? "uprobe" : "kprobe",
+			   libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+	if (retprobe) {
+		int bit = uprobe ? determine_uprobe_retprobe_bit()
+				 : determine_kprobe_retprobe_bit();
+
+		if (bit < 0) {
+			pr_warning("failed to determine %s retprobe bit: %s\n",
+				   uprobe ? "uprobe" : "kprobe",
+				   libbpf_strerror_r(bit, errmsg,
+						     sizeof(errmsg)));
+			return bit;
+		}
+		attr.config |= 1 << bit;
+	}
+	attr.size = sizeof(attr);
+	attr.type = type;
+	attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */
+	attr.config2 = offset;		       /* kprobe_addr or probe_offset */
+
+	/* pid filter is meaningful only for uprobes */
+	pfd = syscall(__NR_perf_event_open, &attr,
+		      pid < 0 ? -1 : pid /* pid */,
+		      pid == -1 ? 0 : -1 /* cpu */,
+		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warning("%s perf_event_open() failed: %s\n",
+			   uprobe ? "uprobe" : "kprobe",
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+					    bool retprobe,
+					    const char *func_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+				    0 /* offset */, -1 /* pid */);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create %s '%s' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "kretprobe" : "kprobe", func_name,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to %s '%s': %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "kretprobe" : "kprobe", func_name,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+					    bool retprobe, pid_t pid,
+					    const char *binary_path,
+					    size_t func_offset)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_probe(true /* uprobe */, retprobe,
+				    binary_path, func_offset, pid);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "uretprobe" : "uprobe",
+			   binary_path, func_offset,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
+			   bpf_program__title(prog, false),
+			   retprobe ? "uretprobe" : "uprobe",
+			   binary_path, func_offset,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 			   void **copy_mem, size_t *copy_size,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1bf66c4a9330..bd767cc11967 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -171,6 +171,13 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+			   const char *func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+			   pid_t pid, const char *binary_path,
+			   size_t func_offset);
 
 struct bpf_insn;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 756f5aa802e9..57a40fb60718 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -169,7 +169,9 @@ LIBBPF_0.0.4 {
 	global:
 		bpf_link__destroy;
 		bpf_object__load_xattr;
+		bpf_program__attach_kprobe;
 		bpf_program__attach_perf_event;
+		bpf_program__attach_uprobe;
 		btf_dump__dump_type;
 		btf_dump__free;
 		btf_dump__new;
-- 
cgit v1.2.3-59-g8ed1b


From f6de59c17f111b3efc701c95b438e46245c4aeb1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:58:59 -0700
Subject: libbpf: add tracepoint attach API

Allow attaching BPF programs to kernel tracepoint BPF hooks specified by
category and name.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c   | 79 ++++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |  4 +++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 84 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b5b49ddb152f..85174fcd99e0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4190,6 +4190,85 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
 	return link;
 }
 
+static int determine_tracepoint_id(const char *tp_category,
+				   const char *tp_name)
+{
+	char file[PATH_MAX];
+	int ret;
+
+	ret = snprintf(file, sizeof(file),
+		       "/sys/kernel/debug/tracing/events/%s/%s/id",
+		       tp_category, tp_name);
+	if (ret < 0)
+		return -errno;
+	if (ret >= sizeof(file)) {
+		pr_debug("tracepoint %s/%s path is too long\n",
+			 tp_category, tp_name);
+		return -E2BIG;
+	}
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int perf_event_open_tracepoint(const char *tp_category,
+				      const char *tp_name)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int tp_id, pfd, err;
+
+	tp_id = determine_tracepoint_id(tp_category, tp_name);
+	if (tp_id < 0) {
+		pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
+			   tp_category, tp_name,
+			   libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
+		return tp_id;
+	}
+
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.size = sizeof(attr);
+	attr.config = tp_id;
+
+	pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
+		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n",
+			   tp_category, tp_name,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+						const char *tp_category,
+						const char *tp_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int pfd, err;
+
+	pfd = perf_event_open_tracepoint(tp_category, tp_name);
+	if (pfd < 0) {
+		pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+			   bpf_program__title(prog, false),
+			   tp_category, tp_name,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link = bpf_program__attach_perf_event(prog, pfd);
+	if (IS_ERR(link)) {
+		close(pfd);
+		err = PTR_ERR(link);
+		pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
+			   bpf_program__title(prog, false),
+			   tp_category, tp_name,
+			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return link;
+	}
+	return link;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 			   void **copy_mem, size_t *copy_size,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index bd767cc11967..60611f4b4e1d 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -178,6 +178,10 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
 			   pid_t pid, const char *binary_path,
 			   size_t func_offset);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint(struct bpf_program *prog,
+			       const char *tp_category,
+			       const char *tp_name);
 
 struct bpf_insn;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 57a40fb60718..3c618b75ef65 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -171,6 +171,7 @@ LIBBPF_0.0.4 {
 		bpf_object__load_xattr;
 		bpf_program__attach_kprobe;
 		bpf_program__attach_perf_event;
+		bpf_program__attach_tracepoint;
 		bpf_program__attach_uprobe;
 		btf_dump__dump_type;
 		btf_dump__free;
-- 
cgit v1.2.3-59-g8ed1b


From 84bf5e1f4f174fc4f06bc747dafa1a7cfa4e9a67 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:59:00 -0700
Subject: libbpf: add raw tracepoint attach API

Add a wrapper utilizing bpf_link "infrastructure" to allow attaching BPF
programs to raw tracepoints.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c   | 39 +++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |  3 +++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 43 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 85174fcd99e0..981fb5c3c6c9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4269,6 +4269,45 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
 	return link;
 }
 
+static int bpf_link__destroy_fd(struct bpf_link *link)
+{
+	struct bpf_link_fd *l = (void *)link;
+
+	return close(l->fd);
+}
+
+struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+						    const char *tp_name)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link_fd *link;
+	int prog_fd, pfd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warning("program '%s': can't attach before loaded\n",
+			   bpf_program__title(prog, false));
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = malloc(sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->link.destroy = &bpf_link__destroy_fd;
+
+	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
+	if (pfd < 0) {
+		pfd = -errno;
+		free(link);
+		pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n",
+			   bpf_program__title(prog, false), tp_name,
+			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(pfd);
+	}
+	link->fd = pfd;
+	return (struct bpf_link *)link;
+}
+
 enum bpf_perf_event_ret
 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 			   void **copy_mem, size_t *copy_size,
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 60611f4b4e1d..f55933784f95 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -182,6 +182,9 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_tracepoint(struct bpf_program *prog,
 			       const char *tp_category,
 			       const char *tp_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+				   const char *tp_name);
 
 struct bpf_insn;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 3c618b75ef65..e6b7d4edbc93 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -171,6 +171,7 @@ LIBBPF_0.0.4 {
 		bpf_object__load_xattr;
 		bpf_program__attach_kprobe;
 		bpf_program__attach_perf_event;
+		bpf_program__attach_raw_tracepoint;
 		bpf_program__attach_tracepoint;
 		bpf_program__attach_uprobe;
 		btf_dump__dump_type;
-- 
cgit v1.2.3-59-g8ed1b


From 8c51b314d2eeae687bf8c28491c9f5e4f05f30f5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:59:01 -0700
Subject: selftests/bpf: switch test to new attach_perf_event API

Use new bpf_program__attach_perf_event() in test previously relying on
direct ioctl manipulations.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../bpf/prog_tests/stacktrace_build_id_nmi.c       | 31 +++++++++++-----------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 1c1a2f75f3d8..9557b7dfb782 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -17,6 +17,7 @@ static __u64 read_perf_max_sample_freq(void)
 void test_stacktrace_build_id_nmi(void)
 {
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *prog_name = "tracepoint/random/urandom_read";
 	const char *file = "./test_stacktrace_build_id.o";
 	int err, pmu_fd, prog_fd;
 	struct perf_event_attr attr = {
@@ -25,7 +26,9 @@ void test_stacktrace_build_id_nmi(void)
 		.config = PERF_COUNT_HW_CPU_CYCLES,
 	};
 	__u32 key, previous_key, val, duration = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
+	struct bpf_link *link;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -39,6 +42,10 @@ retry:
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
 		return;
 
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
+		goto close_prog;
+
 	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
 			 0 /* cpu 0 */, -1 /* group id */,
 			 0 /* flags */);
@@ -47,15 +54,12 @@ retry:
 		  pmu_fd, errno))
 		goto close_prog;
 
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
-		  err, errno))
-		goto close_pmu;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
-		  err, errno))
-		goto disable_pmu;
+	link = bpf_program__attach_perf_event(prog, pmu_fd);
+	if (CHECK(IS_ERR(link), "attach_perf_event",
+		  "err %ld\n", PTR_ERR(link))) {
+		close(pmu_fd);
+		goto close_prog;
+	}
 
 	/* find map fds */
 	control_map_fd = bpf_find_map(__func__, obj, "control_map");
@@ -134,8 +138,7 @@ retry:
 	 * try it one more time.
 	 */
 	if (build_id_matches < 1 && retry--) {
-		ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-		close(pmu_fd);
+		bpf_link__destroy(link);
 		bpf_object__close(obj);
 		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
 		       __func__);
@@ -154,11 +157,7 @@ retry:
 	 */
 
 disable_pmu:
-	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-
-close_pmu:
-	close(pmu_fd);
-
+	bpf_link__destroy(link);
 close_prog:
 	bpf_object__close(obj);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 1e8611bbdfc99faf8c9670dbbe582129a9a29973 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:59:02 -0700
Subject: selftests/bpf: add kprobe/uprobe selftests

Add tests verifying kprobe/kretprobe/uprobe/uretprobe APIs work as
expected.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/attach_probe.c        | 166 +++++++++++++++++++++
 .../selftests/bpf/progs/test_attach_probe.c        |  55 +++++++
 2 files changed, 221 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/attach_probe.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_attach_probe.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
new file mode 100644
index 000000000000..a4686395522c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+ssize_t get_base_addr() {
+	size_t start;
+	char buf[256];
+	FILE *f;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f)
+		return -errno;
+
+	while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) {
+		if (strcmp(buf, "r-xp") == 0) {
+			fclose(f);
+			return start;
+		}
+	}
+
+	fclose(f);
+	return -EINVAL;
+}
+
+#ifdef __x86_64__
+#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
+#else
+#define SYS_KPROBE_NAME "sys_nanosleep"
+#endif
+
+void test_attach_probe(void)
+{
+	const char *kprobe_name = "kprobe/sys_nanosleep";
+	const char *kretprobe_name = "kretprobe/sys_nanosleep";
+	const char *uprobe_name = "uprobe/trigger_func";
+	const char *uretprobe_name = "uretprobe/trigger_func";
+	const int kprobe_idx = 0, kretprobe_idx = 1;
+	const int uprobe_idx = 2, uretprobe_idx = 3;
+	const char *file = "./test_attach_probe.o";
+	struct bpf_program *kprobe_prog, *kretprobe_prog;
+	struct bpf_program *uprobe_prog, *uretprobe_prog;
+	struct bpf_object *obj;
+	int err, prog_fd, duration = 0, res;
+	struct bpf_link *kprobe_link = NULL;
+	struct bpf_link *kretprobe_link = NULL;
+	struct bpf_link *uprobe_link = NULL;
+	struct bpf_link *uretprobe_link = NULL;
+	int results_map_fd;
+	size_t uprobe_offset;
+	ssize_t base_addr;
+
+	base_addr = get_base_addr();
+	if (CHECK(base_addr < 0, "get_base_addr",
+		  "failed to find base addr: %zd", base_addr))
+		return;
+	uprobe_offset = (size_t)&get_base_addr - base_addr;
+
+	/* load programs */
+	err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
+	if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+		return;
+
+	kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
+	if (CHECK(!kprobe_prog, "find_probe",
+		  "prog '%s' not found\n", kprobe_name))
+		goto cleanup;
+	kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
+	if (CHECK(!kretprobe_prog, "find_probe",
+		  "prog '%s' not found\n", kretprobe_name))
+		goto cleanup;
+	uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name);
+	if (CHECK(!uprobe_prog, "find_probe",
+		  "prog '%s' not found\n", uprobe_name))
+		goto cleanup;
+	uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name);
+	if (CHECK(!uretprobe_prog, "find_probe",
+		  "prog '%s' not found\n", uretprobe_name))
+		goto cleanup;
+
+	/* load maps */
+	results_map_fd = bpf_find_map(__func__, obj, "results_map");
+	if (CHECK(results_map_fd < 0, "find_results_map",
+		  "err %d\n", results_map_fd))
+		goto cleanup;
+
+	kprobe_link = bpf_program__attach_kprobe(kprobe_prog,
+						 false /* retprobe */,
+						 SYS_KPROBE_NAME);
+	if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
+		  "err %ld\n", PTR_ERR(kprobe_link))) {
+		kprobe_link = NULL;
+		goto cleanup;
+	}
+	kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog,
+						    true /* retprobe */,
+						    SYS_KPROBE_NAME);
+	if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
+		  "err %ld\n", PTR_ERR(kretprobe_link))) {
+		kretprobe_link = NULL;
+		goto cleanup;
+	}
+	uprobe_link = bpf_program__attach_uprobe(uprobe_prog,
+						 false /* retprobe */,
+						 0 /* self pid */,
+						 "/proc/self/exe",
+						 uprobe_offset);
+	if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
+		  "err %ld\n", PTR_ERR(uprobe_link))) {
+		uprobe_link = NULL;
+		goto cleanup;
+	}
+	uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog,
+						    true /* retprobe */,
+						    -1 /* any pid */,
+						    "/proc/self/exe",
+						    uprobe_offset);
+	if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
+		  "err %ld\n", PTR_ERR(uretprobe_link))) {
+		uretprobe_link = NULL;
+		goto cleanup;
+	}
+
+	/* trigger & validate kprobe && kretprobe */
+	usleep(1);
+
+	err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res);
+	if (CHECK(err, "get_kprobe_res",
+		  "failed to get kprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != kprobe_idx + 1, "check_kprobe_res",
+		  "wrong kprobe res: %d\n", res))
+		goto cleanup;
+
+	err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res);
+	if (CHECK(err, "get_kretprobe_res",
+		  "failed to get kretprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res",
+		  "wrong kretprobe res: %d\n", res))
+		goto cleanup;
+
+	/* trigger & validate uprobe & uretprobe */
+	get_base_addr();
+
+	err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res);
+	if (CHECK(err, "get_uprobe_res",
+		  "failed to get uprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != uprobe_idx + 1, "check_uprobe_res",
+		  "wrong uprobe res: %d\n", res))
+		goto cleanup;
+
+	err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res);
+	if (CHECK(err, "get_uretprobe_res",
+		  "failed to get uretprobe res: %d\n", err))
+		goto cleanup;
+	if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res",
+		  "wrong uretprobe res: %d\n", res))
+		goto cleanup;
+
+cleanup:
+	bpf_link__destroy(kprobe_link);
+	bpf_link__destroy(kretprobe_link);
+	bpf_link__destroy(uprobe_link);
+	bpf_link__destroy(uretprobe_link);
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
new file mode 100644
index 000000000000..7a7c5cd728c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct {
+	int type;
+	int max_entries;
+	int *key;
+	int *value;
+} results_map SEC(".maps") = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.max_entries = 4,
+};
+
+SEC("kprobe/sys_nanosleep")
+int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+{
+	const int key = 0, value = 1;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+SEC("kretprobe/sys_nanosleep")
+int handle_sys_getpid_return(struct pt_regs *ctx)
+{
+	const int key = 1, value = 2;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+SEC("uprobe/trigger_func")
+int handle_uprobe_entry(struct pt_regs *ctx)
+{
+	const int key = 2, value = 3;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+SEC("uretprobe/trigger_func")
+int handle_uprobe_return(struct pt_regs *ctx)
+{
+	const int key = 3, value = 4;
+
+	bpf_map_update_elem(&results_map, &key, &value, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
-- 
cgit v1.2.3-59-g8ed1b


From 1bdb34274acd4a772865289aeaa41696acb60110 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jul 2019 16:59:03 -0700
Subject: selftests/bpf: convert existing tracepoint tests to new APIs

Convert some existing tests that attach to tracepoints to use
bpf_program__attach_tracepoint API instead.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/prog_tests/stacktrace_build_id.c | 55 +++++-----------------
 .../selftests/bpf/prog_tests/stacktrace_map.c      | 43 ++++-------------
 .../bpf/prog_tests/stacktrace_map_raw_tp.c         | 15 ++++--
 3 files changed, 32 insertions(+), 81 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index 3aab2b083c71..ac44fda84833 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -4,11 +4,13 @@
 void test_stacktrace_build_id(void)
 {
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *prog_name = "tracepoint/random/urandom_read";
 	const char *file = "./test_stacktrace_build_id.o";
-	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
-	struct perf_event_attr attr = {};
+	int err, prog_fd, stack_trace_len;
 	__u32 key, previous_key, val, duration = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
+	struct bpf_link *link = NULL;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -18,44 +20,16 @@ void test_stacktrace_build_id(void)
 retry:
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
-		goto out;
+		return;
 
-	/* Get the ID for the sched/sched_switch tracepoint */
-	snprintf(buf, sizeof(buf),
-		 "/sys/kernel/debug/tracing/events/random/urandom_read/id");
-	efd = open(buf, O_RDONLY, 0);
-	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
 		goto close_prog;
 
-	bytes = read(efd, buf, sizeof(buf));
-	close(efd);
-	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
-		  "read", "bytes %d errno %d\n", bytes, errno))
+	link = bpf_program__attach_tracepoint(prog, "random", "urandom_read");
+	if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
 		goto close_prog;
 
-	/* Open the perf event and attach bpf progrram */
-	attr.config = strtol(buf, NULL, 0);
-	attr.type = PERF_TYPE_TRACEPOINT;
-	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
-	attr.sample_period = 1;
-	attr.wakeup_events = 1;
-	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
-			 0 /* cpu 0 */, -1 /* group id */,
-			 0 /* flags */);
-	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
-		  pmu_fd, errno))
-		goto close_prog;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
-		  err, errno))
-		goto close_pmu;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
-		  err, errno))
-		goto disable_pmu;
-
 	/* find map fds */
 	control_map_fd = bpf_find_map(__func__, obj, "control_map");
 	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
@@ -133,8 +107,7 @@ retry:
 	 * try it one more time.
 	 */
 	if (build_id_matches < 1 && retry--) {
-		ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-		close(pmu_fd);
+		bpf_link__destroy(link);
 		bpf_object__close(obj);
 		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
 		       __func__);
@@ -152,14 +125,8 @@ retry:
 	      "err %d errno %d\n", err, errno);
 
 disable_pmu:
-	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-
-close_pmu:
-	close(pmu_fd);
+	bpf_link__destroy(link);
 
 close_prog:
 	bpf_object__close(obj);
-
-out:
-	return;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 2bfd50a0d6d1..fc539335c5b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -4,50 +4,26 @@
 void test_stacktrace_map(void)
 {
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *prog_name = "tracepoint/sched/sched_switch";
+	int err, prog_fd, stack_trace_len;
 	const char *file = "./test_stacktrace_map.o";
-	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
-	struct perf_event_attr attr = {};
 	__u32 key, val, duration = 0;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
-	char buf[256];
+	struct bpf_link *link;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
 		return;
 
-	/* Get the ID for the sched/sched_switch tracepoint */
-	snprintf(buf, sizeof(buf),
-		 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
-	efd = open(buf, O_RDONLY, 0);
-	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
 		goto close_prog;
 
-	bytes = read(efd, buf, sizeof(buf));
-	close(efd);
-	if (bytes <= 0 || bytes >= sizeof(buf))
+	link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
+	if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
 		goto close_prog;
 
-	/* Open the perf event and attach bpf progrram */
-	attr.config = strtol(buf, NULL, 0);
-	attr.type = PERF_TYPE_TRACEPOINT;
-	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
-	attr.sample_period = 1;
-	attr.wakeup_events = 1;
-	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
-			 0 /* cpu 0 */, -1 /* group id */,
-			 0 /* flags */);
-	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
-		  pmu_fd, errno))
-		goto close_prog;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
-	if (err)
-		goto disable_pmu;
-
-	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
-	if (err)
-		goto disable_pmu;
-
 	/* find map fds */
 	control_map_fd = bpf_find_map(__func__, obj, "control_map");
 	if (control_map_fd < 0)
@@ -96,8 +72,7 @@ void test_stacktrace_map(void)
 disable_pmu:
 	error_cnt++;
 disable_pmu_noerr:
-	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
-	close(pmu_fd);
+	bpf_link__destroy(link);
 close_prog:
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 1f8387d80fd7..fbfa8e76cf63 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -3,18 +3,25 @@
 
 void test_stacktrace_map_raw_tp(void)
 {
+	const char *prog_name = "tracepoint/sched/sched_switch";
 	int control_map_fd, stackid_hmap_fd, stackmap_fd;
 	const char *file = "./test_stacktrace_map.o";
-	int efd, err, prog_fd;
 	__u32 key, val, duration = 0;
+	int err, prog_fd;
+	struct bpf_program *prog;
 	struct bpf_object *obj;
+	struct bpf_link *link = NULL;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
 		return;
 
-	efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
-	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
+		goto close_prog;
+
+	link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
+	if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
 		goto close_prog;
 
 	/* find map fds */
@@ -55,5 +62,7 @@ void test_stacktrace_map_raw_tp(void)
 close_prog:
 	error_cnt++;
 close_prog_noerr:
+	if (!IS_ERR_OR_NULL(link))
+		bpf_link__destroy(link);
 	bpf_object__close(obj);
 }
-- 
cgit v1.2.3-59-g8ed1b


From ef99b02b23ef4308057573f76750e661b5aea3e1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 5 Jul 2019 08:50:09 -0700
Subject: libbpf: capture value in BTF type info for BTF-defined map defs

Change BTF-defined map definitions to capture compile-time integer
values as part of BTF type definition, to avoid split of key/value type
information and actual type/size/flags initialization for maps.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 58 ++++++++++++++++++++++++--------------------------
 1 file changed, 28 insertions(+), 30 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 981fb5c3c6c9..2a08eb106221 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1029,40 +1029,40 @@ static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
 	}
 }
 
-static bool get_map_field_int(const char *map_name,
-			      const struct btf *btf,
+/*
+ * Fetch integer attribute of BTF map definition. Such attributes are
+ * represented using a pointer to an array, in which dimensionality of array
+ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
+ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
+ * type definition, while using only sizeof(void *) space in ELF data section.
+ */
+static bool get_map_field_int(const char *map_name, const struct btf *btf,
 			      const struct btf_type *def,
-			      const struct btf_member *m,
-			      const void *data, __u32 *res) {
+			      const struct btf_member *m, __u32 *res) {
 	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
 	const char *name = btf__name_by_offset(btf, m->name_off);
-	__u32 int_info = *(const __u32 *)(const void *)(t + 1);
+	const struct btf_array *arr_info;
+	const struct btf_type *arr_t;
 
-	if (BTF_INFO_KIND(t->info) != BTF_KIND_INT) {
-		pr_warning("map '%s': attr '%s': expected INT, got %u.\n",
+	if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) {
+		pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
 			   map_name, name, BTF_INFO_KIND(t->info));
 		return false;
 	}
-	if (t->size != 4 || BTF_INT_BITS(int_info) != 32 ||
-	    BTF_INT_OFFSET(int_info)) {
-		pr_warning("map '%s': attr '%s': expected 32-bit non-bitfield integer, "
-			   "got %u-byte (%d-bit) one with bit offset %d.\n",
-			   map_name, name, t->size, BTF_INT_BITS(int_info),
-			   BTF_INT_OFFSET(int_info));
-		return false;
-	}
-	if (BTF_INFO_KFLAG(def->info) && BTF_MEMBER_BITFIELD_SIZE(m->offset)) {
-		pr_warning("map '%s': attr '%s': bitfield is not supported.\n",
-			   map_name, name);
+
+	arr_t = btf__type_by_id(btf, t->type);
+	if (!arr_t) {
+		pr_warning("map '%s': attr '%s': type [%u] not found.\n",
+			   map_name, name, t->type);
 		return false;
 	}
-	if (m->offset % 32) {
-		pr_warning("map '%s': attr '%s': unaligned fields are not supported.\n",
-			   map_name, name);
+	if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) {
+		pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
+			   map_name, name, BTF_INFO_KIND(arr_t->info));
 		return false;
 	}
-
-	*res = *(const __u32 *)(data + m->offset / 8);
+	arr_info = (const void *)(arr_t + 1);
+	*res = arr_info->nelems;
 	return true;
 }
 
@@ -1075,7 +1075,6 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 	const struct btf_var_secinfo *vi;
 	const struct btf_var *var_extra;
 	const struct btf_member *m;
-	const void *def_data;
 	const char *map_name;
 	struct bpf_map *map;
 	int vlen, i;
@@ -1132,7 +1131,6 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
 		 map_name, map->sec_idx, map->sec_offset);
 
-	def_data = data->d_buf + vi->offset;
 	vlen = BTF_INFO_VLEN(def->info);
 	m = (const void *)(def + 1);
 	for (i = 0; i < vlen; i++, m++) {
@@ -1145,19 +1143,19 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		}
 		if (strcmp(name, "type") == 0) {
 			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       def_data, &map->def.type))
+					       &map->def.type))
 				return -EINVAL;
 			pr_debug("map '%s': found type = %u.\n",
 				 map_name, map->def.type);
 		} else if (strcmp(name, "max_entries") == 0) {
 			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       def_data, &map->def.max_entries))
+					       &map->def.max_entries))
 				return -EINVAL;
 			pr_debug("map '%s': found max_entries = %u.\n",
 				 map_name, map->def.max_entries);
 		} else if (strcmp(name, "map_flags") == 0) {
 			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       def_data, &map->def.map_flags))
+					       &map->def.map_flags))
 				return -EINVAL;
 			pr_debug("map '%s': found map_flags = %u.\n",
 				 map_name, map->def.map_flags);
@@ -1165,7 +1163,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			__u32 sz;
 
 			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       def_data, &sz))
+					       &sz))
 				return -EINVAL;
 			pr_debug("map '%s': found key_size = %u.\n",
 				 map_name, sz);
@@ -1208,7 +1206,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			__u32 sz;
 
 			if (!get_map_field_int(map_name, obj->btf, def, m,
-					       def_data, &sz))
+					       &sz))
 				return -EINVAL;
 			pr_debug("map '%s': found value_size = %u.\n",
 				 map_name, sz);
-- 
cgit v1.2.3-59-g8ed1b


From 00acd00814527a1856c92beda17475205bddaba6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 5 Jul 2019 08:50:10 -0700
Subject: selftests/bpf: add __uint and __type macro for BTF-defined maps

Add simple __uint and __type macro that hide details of how type and
integer values are captured in BTF-defined maps.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/bpf_helpers.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 1a5b1accf091..5a3d92c8bec8 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -8,6 +8,9 @@
  */
 #define SEC(NAME) __attribute__((section(NAME), used))
 
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) val *name
+
 /* helper macro to print out debug messages */
 #define bpf_printk(fmt, ...)				\
 ({							\
-- 
cgit v1.2.3-59-g8ed1b


From bc7430cc8bfb51577e466a8ca02ad87375a70bde Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 5 Jul 2019 08:50:11 -0700
Subject: selftests/bpf: convert selftests using BTF-defined maps to new syntax

Convert all the existing selftests that are already using BTF-defined
maps to use new syntax (with no static data initialization).

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/bpf_flow.c       | 28 +++-----
 tools/testing/selftests/bpf/progs/netcnt_prog.c    | 20 +++---
 .../selftests/bpf/progs/socket_cookie_prog.c       | 13 ++--
 tools/testing/selftests/bpf/progs/test_btf_newkv.c | 13 ++--
 .../selftests/bpf/progs/test_get_stack_rawtp.c     | 39 ++++-------
 .../testing/selftests/bpf/progs/test_global_data.c | 37 ++++------
 tools/testing/selftests/bpf/progs/test_l4lb.c      | 65 +++++++----------
 .../selftests/bpf/progs/test_l4lb_noinline.c       | 65 +++++++----------
 tools/testing/selftests/bpf/progs/test_map_lock.c  | 26 +++----
 .../bpf/progs/test_select_reuseport_kern.c         | 67 +++++++-----------
 .../selftests/bpf/progs/test_send_signal_kern.c    | 26 +++----
 .../selftests/bpf/progs/test_sock_fields_kern.c    | 78 ++++++++-------------
 tools/testing/selftests/bpf/progs/test_spin_lock.c | 36 ++++------
 .../selftests/bpf/progs/test_stacktrace_build_id.c | 55 ++++++---------
 .../selftests/bpf/progs/test_stacktrace_map.c      | 52 +++++---------
 .../testing/selftests/bpf/progs/test_tcp_estats.c  | 13 ++--
 .../testing/selftests/bpf/progs/test_tcpbpf_kern.c | 26 +++----
 .../selftests/bpf/progs/test_tcpnotify_kern.c      | 28 +++-----
 tools/testing/selftests/bpf/progs/test_xdp.c       | 26 +++----
 .../selftests/bpf/progs/test_xdp_noinline.c        | 81 +++++++++-------------
 20 files changed, 300 insertions(+), 494 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 849f42e548b5..5ae485a6af3f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -58,26 +58,18 @@ struct frag_hdr {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 key_size;
-	__u32 value_size;
-} jmp_table SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PROG_ARRAY,
-	.max_entries = 8,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-};
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 8);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct bpf_flow_keys *value;
-} last_dissection SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct bpf_flow_keys);
+} last_dissection SEC(".maps");
 
 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
 					    int ret)
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index a25c82a5b7c8..38a997852cad 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -11,20 +11,16 @@
 #define NS_PER_SEC	1000000000
 
 struct {
-	__u32 type;
-	struct bpf_cgroup_storage_key *key;
-	struct percpu_net_cnt *value;
-} percpu_netcnt SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
-};
+	__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct percpu_net_cnt);
+} percpu_netcnt SEC(".maps");
 
 struct {
-	__u32 type;
-	struct bpf_cgroup_storage_key *key;
-	struct net_cnt *value;
-} netcnt SEC(".maps") = {
-	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
-};
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct net_cnt);
+} netcnt SEC(".maps");
 
 SEC("cgroup/skb")
 int bpf_nextcnt(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
index 6aabb681fb9a..e4440fdd94cb 100644
--- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
@@ -13,14 +13,11 @@ struct socket_cookie {
 };
 
 struct {
-	__u32 type;
-	__u32 map_flags;
-	int *key;
-	struct socket_cookie *value;
-} socket_cookies SEC(".maps") = {
-	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.map_flags = BPF_F_NO_PREALLOC,
-};
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct socket_cookie);
+} socket_cookies SEC(".maps");
 
 SEC("cgroup/connect6")
 int set_cookie(struct bpf_sock_addr *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 28c16bb583b6..5ee3622ddebb 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -21,14 +21,11 @@ struct bpf_map_def SEC("maps") btf_map_legacy = {
 BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
 
 struct {
-	int *key;
-	struct ipv_counts *value;
-	unsigned int type;
-	unsigned int max_entries;
-} btf_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 4,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, int);
+	__type(value, struct ipv_counts);
+} btf_map SEC(".maps");
 
 struct dummy_tracepoint_args {
 	unsigned long long pad;
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index aaa6ec250e15..d06b47a09097 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -16,26 +16,18 @@ struct stack_trace_t {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 key_size;
-	__u32 value_size;
-} perfmap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.max_entries = 2,
-	.key_size = sizeof(int),
-	.value_size = sizeof(__u32),
-};
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 2);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(__u32));
+} perfmap SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct stack_trace_t *value;
-} stackdata_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct stack_trace_t);
+} stackdata_map SEC(".maps");
 
 /* Allocate per-cpu space twice the needed. For the code below
  *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
@@ -56,14 +48,11 @@ struct {
  * This is an acceptable workaround since there is one entry here.
  */
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
 	__u64 (*value)[2 * MAX_STACK_RAWTP];
-} rawdata_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.max_entries = 1,
-};
+} rawdata_map SEC(".maps");
 
 SEC("tracepoint/raw_syscalls/sys_enter")
 int bpf_prog1(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 866cc7ddbe43..32a6073acb99 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -8,24 +8,18 @@
 #include "bpf_helpers.h"
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u64 *value;
-} result_number SEC(".maps") = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.max_entries	= 11,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 11);
+	__type(key, __u32);
+	__type(value, __u64);
+} result_number SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 5);
+	__type(key, __u32);
 	const char (*value)[32];
-} result_string SEC(".maps") = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.max_entries	= 5,
-};
+} result_string SEC(".maps");
 
 struct foo {
 	__u8  a;
@@ -34,14 +28,11 @@ struct foo {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct foo *value;
-} result_struct SEC(".maps") = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.max_entries	= 5,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 5);
+	__type(key, __u32);
+	__type(value, struct foo);
+} result_struct SEC(".maps");
 
 /* Relocation tests for __u64s. */
 static       __u64 num0;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 848cbb90f581..1d652ee8e73d 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -170,54 +170,39 @@ struct eth_hdr {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	struct vip *key;
-	struct vip_meta *value;
-} vip_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = MAX_VIPS,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, struct vip);
+	__type(value, struct vip_meta);
+} vip_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} ch_rings SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = CH_RINGS_SIZE,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CH_RINGS_SIZE);
+	__type(key, __u32);
+	__type(value, __u32);
+} ch_rings SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct real_definition *value;
-} reals SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = MAX_REALS,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, MAX_REALS);
+	__type(key, __u32);
+	__type(value, struct real_definition);
+} reals SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct vip_stats *value;
-} stats SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.max_entries = MAX_VIPS,
-};
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, __u32);
+	__type(value, struct vip_stats);
+} stats SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct ctl_value *value;
-} ctl_array SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = CTL_MAP_SIZE,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CTL_MAP_SIZE);
+	__type(key, __u32);
+	__type(value, struct ctl_value);
+} ctl_array SEC(".maps");
 
 static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
 					     bool ipv6)
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index c63ecf3ca573..2e4efe70b1e5 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -166,54 +166,39 @@ struct eth_hdr {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	struct vip *key;
-	struct vip_meta *value;
-} vip_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = MAX_VIPS,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, struct vip);
+	__type(value, struct vip_meta);
+} vip_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} ch_rings SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = CH_RINGS_SIZE,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CH_RINGS_SIZE);
+	__type(key, __u32);
+	__type(value, __u32);
+} ch_rings SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct real_definition *value;
-} reals SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = MAX_REALS,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, MAX_REALS);
+	__type(key, __u32);
+	__type(value, struct real_definition);
+} reals SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct vip_stats *value;
-} stats SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.max_entries = MAX_VIPS,
-};
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, MAX_VIPS);
+	__type(key, __u32);
+	__type(value, struct vip_stats);
+} stats SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct ctl_value *value;
-} ctl_array SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = CTL_MAP_SIZE,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, CTL_MAP_SIZE);
+	__type(key, __u32);
+	__type(value, struct ctl_value);
+} ctl_array SEC(".maps");
 
 static __u32 get_packet_hash(struct packet_description *pckt,
 			     bool ipv6)
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index 40d9c2853393..bb7ce35f691b 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -12,14 +12,11 @@ struct hmap_elem {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct hmap_elem *value;
-} hash_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct hmap_elem);
+} hash_map SEC(".maps");
 
 struct array_elem {
 	struct bpf_spin_lock lock;
@@ -27,14 +24,11 @@ struct array_elem {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	int *key;
-	struct array_elem *value;
-} array_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct array_elem);
+} array_map SEC(".maps");
 
 SEC("map_lock_demo")
 int bpf_map_lock_test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 435a9527733e..ea7d84f01235 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -22,56 +22,39 @@ int _version SEC("version") = 1;
 #endif
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 key_size;
-	__u32 value_size;
-} outer_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
-	.max_entries = 1,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} outer_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} result_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = NR_RESULTS,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, NR_RESULTS);
+	__type(key, __u32);
+	__type(value, __u32);
+} result_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	int *value;
-} tmp_index_ovr_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, int);
+} tmp_index_ovr_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} linum_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} linum_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct data_check *value;
-} data_check_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct data_check);
+} data_check_map SEC(".maps");
 
 #define GOTO_DONE(_result) ({			\
 	result = (_result);			\
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index 6ac68be5d68b..0e6be01157e6 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -5,24 +5,18 @@
 #include "bpf_helpers.h"
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u64 *value;
-} info_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} info_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u64 *value;
-} status_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} status_map SEC(".maps");
 
 SEC("send_signal_demo")
 int bpf_send_signal_test(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
index c3d383d650cb..a47b003623ef 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
@@ -28,44 +28,32 @@ enum bpf_linum_array_idx {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct sockaddr_in6 *value;
-} addr_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = __NR_BPF_ADDR_ARRAY_IDX,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, struct sockaddr_in6);
+} addr_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct bpf_sock *value;
-} sock_result_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, struct bpf_sock);
+} sock_result_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct bpf_tcp_sock *value;
-} tcp_sock_result_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = __NR_BPF_RESULT_ARRAY_IDX,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, struct bpf_tcp_sock);
+} tcp_sock_result_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} linum_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = __NR_BPF_LINUM_ARRAY_IDX,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
+	__type(key, __u32);
+	__type(value, __u32);
+} linum_map SEC(".maps");
 
 struct bpf_spinlock_cnt {
 	struct bpf_spin_lock lock;
@@ -73,24 +61,18 @@ struct bpf_spinlock_cnt {
 };
 
 struct {
-	__u32 type;
-	__u32 map_flags;
-	int *key;
-	struct bpf_spinlock_cnt *value;
-} sk_pkt_out_cnt SEC(".maps") = {
-	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.map_flags = BPF_F_NO_PREALLOC,
-};
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct bpf_spinlock_cnt);
+} sk_pkt_out_cnt SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 map_flags;
-	int *key;
-	struct bpf_spinlock_cnt *value;
-} sk_pkt_out_cnt10 SEC(".maps") = {
-	.type = BPF_MAP_TYPE_SK_STORAGE,
-	.map_flags = BPF_F_NO_PREALLOC,
-};
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct bpf_spinlock_cnt);
+} sk_pkt_out_cnt10 SEC(".maps");
 
 static bool is_loopback6(__u32 *a6)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 0a77ae36d981..a43b999c8da2 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -11,14 +11,11 @@ struct hmap_elem {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	int *key;
-	struct hmap_elem *value;
-} hmap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct hmap_elem);
+} hmap SEC(".maps");
 
 struct cls_elem {
 	struct bpf_spin_lock lock;
@@ -26,12 +23,10 @@ struct cls_elem {
 };
 
 struct {
-	__u32 type;
-	struct bpf_cgroup_storage_key *key;
-	struct cls_elem *value;
-} cls_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_CGROUP_STORAGE,
-};
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, struct bpf_cgroup_storage_key);
+	__type(value, struct cls_elem);
+} cls_map SEC(".maps");
 
 struct bpf_vqueue {
 	struct bpf_spin_lock lock;
@@ -42,14 +37,11 @@ struct bpf_vqueue {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	int *key;
-	struct bpf_vqueue *value;
-} vqueue SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct bpf_vqueue);
+} vqueue SEC(".maps");
 
 #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
 
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index fcf2280bb60c..bbfc8337b6f0 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -9,51 +9,36 @@
 #endif
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} control_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} control_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} stackid_hmap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = 16384,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, __u32);
+} stackid_hmap SEC(".maps");
 
 typedef struct bpf_stack_build_id stack_trace_t[PERF_MAX_STACK_DEPTH];
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 map_flags;
-	__u32 key_size;
-	__u32 value_size;
-} stackmap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.max_entries = 128,
-	.map_flags = BPF_F_STACK_BUILD_ID,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(stack_trace_t),
-};
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 128);
+	__uint(map_flags, BPF_F_STACK_BUILD_ID);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 128);
+	__type(key, __u32);
 	/* there seems to be a bug in kernel not handling typedef properly */
 	struct bpf_stack_build_id (*value)[PERF_MAX_STACK_DEPTH];
-} stack_amap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 128,
-};
+} stack_amap SEC(".maps");
 
 /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
 struct random_urandom_args {
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index 7ad09adbf648..803c15dc109d 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -9,48 +9,34 @@
 #endif
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} control_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 1,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} control_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} stackid_hmap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = 16384,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, __u32);
+} stackid_hmap SEC(".maps");
 
 typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 key_size;
-	__u32 value_size;
-} stackmap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.max_entries = 16384,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(stack_trace_t),
-};
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16384);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(stack_trace_t));
+} stackmap SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
 	__u64 (*value)[PERF_MAX_STACK_DEPTH];
-} stack_amap SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 16384,
-};
+} stack_amap SEC(".maps");
 
 /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
 struct sched_switch_args {
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index df98f7e32832..c8c595da38d4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -149,14 +149,11 @@ struct tcp_estats_basic_event {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct tcp_estats_basic_event *value;
-} ev_record_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = 1024,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1024);
+	__type(key, __u32);
+	__type(value, struct tcp_estats_basic_event);
+} ev_record_map SEC(".maps");
 
 struct dummy_tracepoint_args {
 	unsigned long long pad;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 38e10c9fd996..2e233613d1fc 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -15,24 +15,18 @@
 #include "test_tcpbpf.h"
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct tcpbpf_globals *value;
-} global_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 4,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, __u32);
+	__type(value, struct tcpbpf_globals);
+} global_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	int *value;
-} sockopt_results SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 2,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, int);
+} sockopt_results SEC(".maps");
 
 static inline void update_event_map(int event)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index d073d37d4e27..08346e7765d5 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -15,26 +15,18 @@
 #include "test_tcpnotify.h"
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct tcpnotify_globals *value;
-} global_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 4,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, __u32);
+	__type(value, struct tcpnotify_globals);
+} global_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 key_size;
-	__u32 value_size;
-} perf_event_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.max_entries = 2,
-	.key_size = sizeof(int),
-	.value_size = sizeof(__u32),
-};
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 2);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(__u32));
+} perf_event_map SEC(".maps");
 
 int _version SEC("version") = 1;
 
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index ec3d2c1c8cf9..0941c655b07b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -23,24 +23,18 @@
 int _version SEC("version") = 1;
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u64 *value;
-} rxcnt SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.max_entries = 256,
-};
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 256);
+	__type(key, __u32);
+	__type(value, __u64);
+} rxcnt SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	struct vip *key;
-	struct iptnl_info *value;
-} vip2tnl SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = MAX_IPTNL_ENTRIES,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_IPTNL_ENTRIES);
+	__type(key, struct vip);
+	__type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
 
 static __always_inline void count_tx(__u32 protocol)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index d2eddb5553d1..dad8a7e33eaa 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -164,66 +164,47 @@ struct lb_stats {
 };
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	struct vip_definition *key;
-	struct vip_meta *value;
-} vip_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_HASH,
-	.max_entries = 512,
-};
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 512);
+	__type(key, struct vip_definition);
+	__type(value, struct vip_meta);
+} vip_map SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 map_flags;
-	struct flow_key *key;
-	struct real_pos_lru *value;
-} lru_cache SEC(".maps") = {
-	.type = BPF_MAP_TYPE_LRU_HASH,
-	.max_entries = 300,
-	.map_flags = 1U << 1,
-};
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(max_entries, 300);
+	__uint(map_flags, 1U << 1);
+	__type(key, struct flow_key);
+	__type(value, struct real_pos_lru);
+} lru_cache SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	__u32 *value;
-} ch_rings SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 12 * 655,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 12 * 655);
+	__type(key, __u32);
+	__type(value, __u32);
+} ch_rings SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct real_definition *value;
-} reals SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 40,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 40);
+	__type(key, __u32);
+	__type(value, struct real_definition);
+} reals SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct lb_stats *value;
-} stats SEC(".maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.max_entries = 515,
-};
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 515);
+	__type(key, __u32);
+	__type(value, struct lb_stats);
+} stats SEC(".maps");
 
 struct {
-	__u32 type;
-	__u32 max_entries;
-	__u32 *key;
-	struct ctl_value *value;
-} ctl_array SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 16,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 16);
+	__type(key, __u32);
+	__type(value, struct ctl_value);
+} ctl_array SEC(".maps");
 
 struct eth_hdr {
 	unsigned char eth_dest[6];
-- 
cgit v1.2.3-59-g8ed1b


From 1639b17c72fa7ad977ccd0ad6c673e3f7048723b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 5 Jul 2019 08:50:12 -0700
Subject: selftests/bpf: convert legacy BPF maps to BTF-defined ones

Convert selftests that were originally left out and new ones added
recently to consistently use BTF-defined maps.

Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../selftests/bpf/progs/get_cgroup_id_kern.c       |  26 ++---
 tools/testing/selftests/bpf/progs/pyperf.h         |  90 ++++++++---------
 .../selftests/bpf/progs/sockmap_verdict_prog.c     |  48 ++++-----
 tools/testing/selftests/bpf/progs/strobemeta.h     |  68 ++++++-------
 .../testing/selftests/bpf/progs/test_map_in_map.c  |  30 +++---
 tools/testing/selftests/bpf/progs/test_obj_id.c    |  12 +--
 tools/testing/selftests/bpf/progs/test_xdp_loop.c  |  26 ++---
 .../testing/selftests/bpf/progs/xdp_redirect_map.c |  12 +--
 tools/testing/selftests/bpf/progs/xdping_kern.c    |  12 +--
 tools/testing/selftests/bpf/test_queue_stack_map.h |  30 +++---
 tools/testing/selftests/bpf/test_sockmap_kern.h    | 110 ++++++++++-----------
 11 files changed, 228 insertions(+), 236 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
index 014dba10b8a5..16c54ade6888 100644
--- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
@@ -4,19 +4,19 @@
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") cg_ids = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") pidmap = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} cg_ids SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} pidmap SEC(".maps");
 
 SEC("tracepoint/syscalls/sys_enter_nanosleep")
 int trace(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index abf6224649be..003fe106fc70 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -58,14 +58,6 @@ typedef struct {
 } Event;
 
 
-struct bpf_elf_map {
-	__u32 type;
-	__u32 size_key;
-	__u32 size_value;
-	__u32 max_elem;
-	__u32 flags;
-};
-
 typedef int pid_t;
 
 typedef struct {
@@ -118,47 +110,47 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData,
 	return true;
 }
 
-struct bpf_elf_map SEC("maps") pidmap = {
-	.type = BPF_MAP_TYPE_HASH,
-	.size_key = sizeof(int),
-	.size_value = sizeof(PidData),
-	.max_elem = 1,
-};
-
-struct bpf_elf_map SEC("maps") eventmap = {
-	.type = BPF_MAP_TYPE_HASH,
-	.size_key = sizeof(int),
-	.size_value = sizeof(Event),
-	.max_elem = 1,
-};
-
-struct bpf_elf_map SEC("maps") symbolmap = {
-	.type = BPF_MAP_TYPE_HASH,
-	.size_key = sizeof(Symbol),
-	.size_value = sizeof(int),
-	.max_elem = 1,
-};
-
-struct bpf_elf_map SEC("maps") statsmap = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.size_key = sizeof(Stats),
-	.size_value = sizeof(int),
-	.max_elem = 1,
-};
-
-struct bpf_elf_map SEC("maps") perfmap = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.size_key = sizeof(int),
-	.size_value = sizeof(int),
-	.max_elem = 32,
-};
-
-struct bpf_elf_map SEC("maps") stackmap = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.size_key = sizeof(int),
-	.size_value = sizeof(long long) * 127,
-	.max_elem = 1000,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, PidData);
+} pidmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, Event);
+} eventmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, Symbol);
+	__type(value, int);
+} symbolmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, Stats);
+} statsmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 32);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} perfmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 1000);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(long long) * 127);
+} stackmap SEC(".maps");
 
 static __always_inline int __on_event(struct pt_regs *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index d85c874ef25e..433e23918a62 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -4,33 +4,33 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def SEC("maps") sock_map_rx = {
-	.type = BPF_MAP_TYPE_SOCKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_rx SEC(".maps");
 
-struct bpf_map_def SEC("maps") sock_map_tx = {
-	.type = BPF_MAP_TYPE_SOCKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_tx SEC(".maps");
 
-struct bpf_map_def SEC("maps") sock_map_msg = {
-	.type = BPF_MAP_TYPE_SOCKMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_msg SEC(".maps");
 
-struct bpf_map_def SEC("maps") sock_map_break = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 20);
+	__type(key, int);
+	__type(value, int);
+} sock_map_break SEC(".maps");
 
 SEC("sk_skb2")
 int bpf_prog2(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 553bc3b62e89..8a399bdfd920 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -204,40 +204,40 @@ struct strobelight_bpf_sample {
 	char dummy_safeguard;
 };
 
-struct bpf_map_def SEC("maps") samples = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 32,
-};
-
-struct bpf_map_def SEC("maps") stacks_0 = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(uint32_t),
-	.value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
-	.max_entries = 16,
-};
-
-struct bpf_map_def SEC("maps") stacks_1 = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(uint32_t),
-	.value_size = sizeof(uint64_t) * PERF_MAX_STACK_DEPTH,
-	.max_entries = 16,
-};
-
-struct bpf_map_def SEC("maps") sample_heap = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(uint32_t),
-	.value_size = sizeof(struct strobelight_bpf_sample),
-	.max_entries = 1,
-};
-
-struct bpf_map_def SEC("maps") strobemeta_cfgs = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(pid_t),
-	.value_size = sizeof(struct strobemeta_cfg),
-	.max_entries = STROBE_MAX_CFGS,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 32);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} samples SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
+} stacks_0 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16);
+	__uint(key_size, sizeof(uint32_t));
+	__uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
+} stacks_1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, uint32_t);
+	__type(value, struct strobelight_bpf_sample);
+} sample_heap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, STROBE_MAX_CFGS);
+	__type(key, pid_t);
+	__type(value, struct strobemeta_cfg);
+} strobemeta_cfgs SEC(".maps");
 
 /* Type for the dtv.  */
 /* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 2985f262846e..113226115365 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -5,23 +5,23 @@
 #include <linux/types.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") mim_array = {
-	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
-	.key_size = sizeof(int),
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(map_flags, 0);
+	__uint(key_size, sizeof(__u32));
 	/* must be sizeof(__u32) for map in map */
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-	.map_flags = 0,
-};
-
-struct bpf_map_def SEC("maps") mim_hash = {
-	.type = BPF_MAP_TYPE_HASH_OF_MAPS,
-	.key_size = sizeof(int),
+	__uint(value_size, sizeof(__u32));
+} mim_array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(map_flags, 0);
+	__uint(key_size, sizeof(int));
 	/* must be sizeof(__u32) for map in map */
-	.value_size = sizeof(__u32),
-	.max_entries = 1,
-	.map_flags = 0,
-};
+	__uint(value_size, sizeof(__u32));
+} mim_hash SEC(".maps");
 
 SEC("xdp_mimtest")
 int xdp_mimtest0(struct xdp_md *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index 726340fa6fe0..3d30c02bdae9 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -13,12 +13,12 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def SEC("maps") test_map_id = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} test_map_id SEC(".maps");
 
 SEC("test_obj_id_dummy")
 int test_obj_id(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index 7fa4677df22e..97175f73c3fe 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -18,19 +18,19 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def SEC("maps") rxcnt = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(__u64),
-	.max_entries = 256,
-};
-
-struct bpf_map_def SEC("maps") vip2tnl = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct vip),
-	.value_size = sizeof(struct iptnl_info),
-	.max_entries = MAX_IPTNL_ENTRIES,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 256);
+	__type(key, __u32);
+	__type(value, __u64);
+} rxcnt SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, MAX_IPTNL_ENTRIES);
+	__type(key, struct vip);
+	__type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
 
 static __always_inline void count_tx(__u32 protocol)
 {
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
index e87a985b9df9..1c5f298d7196 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -3,12 +3,12 @@
 #include <linux/bpf.h>
 #include "bpf_helpers.h"
 
-struct bpf_map_def SEC("maps") tx_port = {
-	.type = BPF_MAP_TYPE_DEVMAP,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 8,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(max_entries, 8);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} tx_port SEC(".maps");
 
 SEC("redirect_map_0")
 int xdp_redirect_map_0(struct xdp_md *xdp)
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 87393e7c667c..112a2857f4e2 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -17,12 +17,12 @@
 
 #include "xdping.h"
 
-struct bpf_map_def SEC("maps") ping_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct pinginfo),
-	.max_entries = 256,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 256);
+	__type(key, __u32);
+	__type(value, struct pinginfo);
+} ping_map SEC(".maps");
 
 static __always_inline void swap_src_dst_mac(void *data)
 {
diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h
index 295b9b3bc5c7..0e014d3b2b36 100644
--- a/tools/testing/selftests/bpf/test_queue_stack_map.h
+++ b/tools/testing/selftests/bpf/test_queue_stack_map.h
@@ -10,21 +10,21 @@
 
 int _version SEC("version") = 1;
 
-struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = {
-	.type = MAP_TYPE,
-	.key_size = 0,
-	.value_size = sizeof(__u32),
-	.max_entries = 32,
-	.map_flags = 0,
-};
-
-struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = {
-	.type = MAP_TYPE,
-	.key_size = 0,
-	.value_size = sizeof(__u32),
-	.max_entries = 32,
-	.map_flags = 0,
-};
+struct {
+	__uint(type, MAP_TYPE);
+	__uint(max_entries, 32);
+	__uint(map_flags, 0);
+	__uint(key_size, 0);
+	__uint(value_size, sizeof(__u32));
+} map_in SEC(".maps");
+
+struct {
+	__uint(type, MAP_TYPE);
+	__uint(max_entries, 32);
+	__uint(map_flags, 0);
+	__uint(key_size, 0);
+	__uint(value_size, sizeof(__u32));
+} map_out SEC(".maps");
 
 SEC("test")
 int _test(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index 4e7d3da21357..d008b41b7d8d 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -28,61 +28,61 @@
  * are established and verdicts are decided.
  */
 
-struct bpf_map_def SEC("maps") sock_map = {
-	.type = TEST_MAP_TYPE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_txmsg = {
-	.type = TEST_MAP_TYPE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_map_redir = {
-	.type = TEST_MAP_TYPE,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 20,
-};
-
-struct bpf_map_def SEC("maps") sock_apply_bytes = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_cork_bytes = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_bytes = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 6
-};
-
-struct bpf_map_def SEC("maps") sock_redir_flags = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
-
-struct bpf_map_def SEC("maps") sock_skb_opts = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1
-};
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map SEC(".maps");
+
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_txmsg SEC(".maps");
+
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_redir SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_apply_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_cork_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 6);
+	__type(key, int);
+	__type(value, int);
+} sock_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_redir_flags SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_skb_opts SEC(".maps");
 
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
-- 
cgit v1.2.3-59-g8ed1b


From ba95c7452439756d4f6dceb5a188b7c31dbbe5b6 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Fri, 5 Jul 2019 18:54:33 +0100
Subject: tools: bpftool: add "prog run" subcommand to test-run programs

Add a new "bpftool prog run" subcommand to run a loaded program on input
data (and possibly with input context) passed by the user.

Print output data (and output context if relevant) into a file or into
the console. Print return value and duration for the test run into the
console.

A "repeat" argument can be passed to run the program several times in a
row.

The command does not perform any kind of verification based on program
type (Is this program type allowed to use an input context?) or on data
consistency (Can I work with empty input data?), this is left to the
kernel.

Example invocation:

    # perl -e 'print "\x0" x 14' | ./bpftool prog run \
            pinned /sys/fs/bpf/sample_ret0 \
            data_in - data_out - repeat 5
    0000000 0000 0000 0000 0000 0000 0000 0000      | ........ ......
    Return value: 0, duration (average): 260ns

When one of data_in or ctx_in is "-", bpftool reads from standard input,
in binary format. Other formats (JSON, hexdump) might be supported (via
an optional command line keyword like "data_fmt_in") in the future if
relevant, but this would require doing more parsing in bpftool.

v2:
- Fix argument names for function check_single_stdin(). (Yonghong)

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst |  34 +++
 tools/bpf/bpftool/bash-completion/bpftool        |  28 +-
 tools/bpf/bpftool/main.c                         |  29 ++
 tools/bpf/bpftool/main.h                         |   1 +
 tools/bpf/bpftool/prog.c                         | 348 ++++++++++++++++++++++-
 tools/include/linux/sizes.h                      |  48 ++++
 6 files changed, 485 insertions(+), 3 deletions(-)
 create mode 100644 tools/include/linux/sizes.h

(limited to 'tools')

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 1df637f85f94..7a374b3c851d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -29,6 +29,7 @@ PROG COMMANDS
 |	**bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog tracelog**
+|	**bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
 |	**bpftool** **prog help**
 |
 |	*MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -146,6 +147,39 @@ DESCRIPTION
 		  streaming data from BPF programs to user space, one can use
 		  perf events (see also **bpftool-map**\ (8)).
 
+	**bpftool prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
+		  Run BPF program *PROG* in the kernel testing infrastructure
+		  for BPF, meaning that the program works on the data and
+		  context provided by the user, and not on actual packets or
+		  monitored functions etc. Return value and duration for the
+		  test run are printed out to the console.
+
+		  Input data is read from the *FILE* passed with **data_in**.
+		  If this *FILE* is "**-**", input data is read from standard
+		  input. Input context, if any, is read from *FILE* passed with
+		  **ctx_in**. Again, "**-**" can be used to read from standard
+		  input, but only if standard input is not already in use for
+		  input data. If a *FILE* is passed with **data_out**, output
+		  data is written to that file. Similarly, output context is
+		  written to the *FILE* passed with **ctx_out**. For both
+		  output flows, "**-**" can be used to print to the standard
+		  output (as plain text, or JSON if relevant option was
+		  passed). If output keywords are omitted, output data and
+		  context are discarded. Keywords **data_size_out** and
+		  **ctx_size_out** are used to pass the size (in bytes) for the
+		  output buffers to the kernel, although the default of 32 kB
+		  should be more than enough for most cases.
+
+		  Keyword **repeat** is used to indicate the number of
+		  consecutive runs to perform. Note that output data and
+		  context printed to files correspond to the last of those
+		  runs. The duration printed out at the end of the runs is an
+		  average over all runs performed by the command.
+
+		  Not all program types support test run. Among those which do,
+		  not all of them can take the **ctx_in**/**ctx_out**
+		  arguments. bpftool does not perform checks on program types.
+
 	**bpftool prog help**
 		  Print short help message.
 
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index ba37095e1f62..965a8658cca3 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -408,10 +408,34 @@ _bpftool()
                 tracelog)
                     return 0
                     ;;
+                run)
+                    if [[ ${#words[@]} -lt 5 ]]; then
+                        _filedir
+                        return 0
+                    fi
+                    case $prev in
+                        id)
+                            _bpftool_get_prog_ids
+                            return 0
+                            ;;
+                        data_in|data_out|ctx_in|ctx_out)
+                            _filedir
+                            return 0
+                            ;;
+                        repeat|data_size_out|ctx_size_out)
+                            return 0
+                            ;;
+                        *)
+                            _bpftool_once_attr 'data_in data_out data_size_out \
+                                ctx_in ctx_out ctx_size_out repeat'
+                            return 0
+                            ;;
+                    esac
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'dump help pin attach detach load \
-                            show list tracelog' -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'dump help pin attach detach \
+                            load show list tracelog run' -- "$cur" ) )
                     ;;
             esac
             ;;
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 4879f6395c7e..e916ff25697f 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -117,6 +117,35 @@ bool is_prefix(const char *pfx, const char *str)
 	return !memcmp(str, pfx, strlen(pfx));
 }
 
+/* Last argument MUST be NULL pointer */
+int detect_common_prefix(const char *arg, ...)
+{
+	unsigned int count = 0;
+	const char *ref;
+	char msg[256];
+	va_list ap;
+
+	snprintf(msg, sizeof(msg), "ambiguous prefix: '%s' could be '", arg);
+	va_start(ap, arg);
+	while ((ref = va_arg(ap, const char *))) {
+		if (!is_prefix(arg, ref))
+			continue;
+		count++;
+		if (count > 1)
+			strncat(msg, "' or '", sizeof(msg) - strlen(msg) - 1);
+		strncat(msg, ref, sizeof(msg) - strlen(msg) - 1);
+	}
+	va_end(ap);
+	strncat(msg, "'", sizeof(msg) - strlen(msg) - 1);
+
+	if (count >= 2) {
+		p_err(msg);
+		return -1;
+	}
+
+	return 0;
+}
+
 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
 {
 	unsigned char *data = arg;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9c5d9c80f71e..3ef0d9051e10 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -101,6 +101,7 @@ void p_err(const char *fmt, ...);
 void p_info(const char *fmt, ...);
 
 bool is_prefix(const char *pfx, const char *str);
+int detect_common_prefix(const char *arg, ...);
 void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
 void usage(void) __noreturn;
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9b0db5d14e31..66f04a4846a5 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -15,6 +15,7 @@
 #include <sys/stat.h>
 
 #include <linux/err.h>
+#include <linux/sizes.h>
 
 #include <bpf.h>
 #include <btf.h>
@@ -748,6 +749,344 @@ static int do_detach(int argc, char **argv)
 	return 0;
 }
 
+static int check_single_stdin(char *file_data_in, char *file_ctx_in)
+{
+	if (file_data_in && file_ctx_in &&
+	    !strcmp(file_data_in, "-") && !strcmp(file_ctx_in, "-")) {
+		p_err("cannot use standard input for both data_in and ctx_in");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int get_run_data(const char *fname, void **data_ptr, unsigned int *size)
+{
+	size_t block_size = 256;
+	size_t buf_size = block_size;
+	size_t nb_read = 0;
+	void *tmp;
+	FILE *f;
+
+	if (!fname) {
+		*data_ptr = NULL;
+		*size = 0;
+		return 0;
+	}
+
+	if (!strcmp(fname, "-"))
+		f = stdin;
+	else
+		f = fopen(fname, "r");
+	if (!f) {
+		p_err("failed to open %s: %s", fname, strerror(errno));
+		return -1;
+	}
+
+	*data_ptr = malloc(block_size);
+	if (!*data_ptr) {
+		p_err("failed to allocate memory for data_in/ctx_in: %s",
+		      strerror(errno));
+		goto err_fclose;
+	}
+
+	while ((nb_read += fread(*data_ptr + nb_read, 1, block_size, f))) {
+		if (feof(f))
+			break;
+		if (ferror(f)) {
+			p_err("failed to read data_in/ctx_in from %s: %s",
+			      fname, strerror(errno));
+			goto err_free;
+		}
+		if (nb_read > buf_size - block_size) {
+			if (buf_size == UINT32_MAX) {
+				p_err("data_in/ctx_in is too long (max: %d)",
+				      UINT32_MAX);
+				goto err_free;
+			}
+			/* No space for fread()-ing next chunk; realloc() */
+			buf_size *= 2;
+			tmp = realloc(*data_ptr, buf_size);
+			if (!tmp) {
+				p_err("failed to reallocate data_in/ctx_in: %s",
+				      strerror(errno));
+				goto err_free;
+			}
+			*data_ptr = tmp;
+		}
+	}
+	if (f != stdin)
+		fclose(f);
+
+	*size = nb_read;
+	return 0;
+
+err_free:
+	free(*data_ptr);
+	*data_ptr = NULL;
+err_fclose:
+	if (f != stdin)
+		fclose(f);
+	return -1;
+}
+
+static void hex_print(void *data, unsigned int size, FILE *f)
+{
+	size_t i, j;
+	char c;
+
+	for (i = 0; i < size; i += 16) {
+		/* Row offset */
+		fprintf(f, "%07zx\t", i);
+
+		/* Hexadecimal values */
+		for (j = i; j < i + 16 && j < size; j++)
+			fprintf(f, "%02x%s", *(uint8_t *)(data + j),
+				j % 2 ? " " : "");
+		for (; j < i + 16; j++)
+			fprintf(f, "  %s", j % 2 ? " " : "");
+
+		/* ASCII values (if relevant), '.' otherwise */
+		fprintf(f, "| ");
+		for (j = i; j < i + 16 && j < size; j++) {
+			c = *(char *)(data + j);
+			if (c < ' ' || c > '~')
+				c = '.';
+			fprintf(f, "%c%s", c, j == i + 7 ? " " : "");
+		}
+
+		fprintf(f, "\n");
+	}
+}
+
+static int
+print_run_output(void *data, unsigned int size, const char *fname,
+		 const char *json_key)
+{
+	size_t nb_written;
+	FILE *f;
+
+	if (!fname)
+		return 0;
+
+	if (!strcmp(fname, "-")) {
+		f = stdout;
+		if (json_output) {
+			jsonw_name(json_wtr, json_key);
+			print_data_json(data, size);
+		} else {
+			hex_print(data, size, f);
+		}
+		return 0;
+	}
+
+	f = fopen(fname, "w");
+	if (!f) {
+		p_err("failed to open %s: %s", fname, strerror(errno));
+		return -1;
+	}
+
+	nb_written = fwrite(data, 1, size, f);
+	fclose(f);
+	if (nb_written != size) {
+		p_err("failed to write output data/ctx: %s", strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int alloc_run_data(void **data_ptr, unsigned int size_out)
+{
+	*data_ptr = calloc(size_out, 1);
+	if (!*data_ptr) {
+		p_err("failed to allocate memory for output data/ctx: %s",
+		      strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+static int do_run(int argc, char **argv)
+{
+	char *data_fname_in = NULL, *data_fname_out = NULL;
+	char *ctx_fname_in = NULL, *ctx_fname_out = NULL;
+	struct bpf_prog_test_run_attr test_attr = {0};
+	const unsigned int default_size = SZ_32K;
+	void *data_in = NULL, *data_out = NULL;
+	void *ctx_in = NULL, *ctx_out = NULL;
+	unsigned int repeat = 1;
+	int fd, err;
+
+	if (!REQ_ARGS(4))
+		return -1;
+
+	fd = prog_parse_fd(&argc, &argv);
+	if (fd < 0)
+		return -1;
+
+	while (argc) {
+		if (detect_common_prefix(*argv, "data_in", "data_out",
+					 "data_size_out", NULL))
+			return -1;
+		if (detect_common_prefix(*argv, "ctx_in", "ctx_out",
+					 "ctx_size_out", NULL))
+			return -1;
+
+		if (is_prefix(*argv, "data_in")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			data_fname_in = GET_ARG();
+			if (check_single_stdin(data_fname_in, ctx_fname_in))
+				return -1;
+		} else if (is_prefix(*argv, "data_out")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			data_fname_out = GET_ARG();
+		} else if (is_prefix(*argv, "data_size_out")) {
+			char *endptr;
+
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			test_attr.data_size_out = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as output data size",
+				      *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "ctx_in")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			ctx_fname_in = GET_ARG();
+			if (check_single_stdin(data_fname_in, ctx_fname_in))
+				return -1;
+		} else if (is_prefix(*argv, "ctx_out")) {
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			ctx_fname_out = GET_ARG();
+		} else if (is_prefix(*argv, "ctx_size_out")) {
+			char *endptr;
+
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			test_attr.ctx_size_out = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as output context size",
+				      *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else if (is_prefix(*argv, "repeat")) {
+			char *endptr;
+
+			NEXT_ARG();
+			if (!REQ_ARGS(1))
+				return -1;
+
+			repeat = strtoul(*argv, &endptr, 0);
+			if (*endptr) {
+				p_err("can't parse %s as repeat number",
+				      *argv);
+				return -1;
+			}
+			NEXT_ARG();
+		} else {
+			p_err("expected no more arguments, 'data_in', 'data_out', 'data_size_out', 'ctx_in', 'ctx_out', 'ctx_size_out' or 'repeat', got: '%s'?",
+			      *argv);
+			return -1;
+		}
+	}
+
+	err = get_run_data(data_fname_in, &data_in, &test_attr.data_size_in);
+	if (err)
+		return -1;
+
+	if (data_in) {
+		if (!test_attr.data_size_out)
+			test_attr.data_size_out = default_size;
+		err = alloc_run_data(&data_out, test_attr.data_size_out);
+		if (err)
+			goto free_data_in;
+	}
+
+	err = get_run_data(ctx_fname_in, &ctx_in, &test_attr.ctx_size_in);
+	if (err)
+		goto free_data_out;
+
+	if (ctx_in) {
+		if (!test_attr.ctx_size_out)
+			test_attr.ctx_size_out = default_size;
+		err = alloc_run_data(&ctx_out, test_attr.ctx_size_out);
+		if (err)
+			goto free_ctx_in;
+	}
+
+	test_attr.prog_fd	= fd;
+	test_attr.repeat	= repeat;
+	test_attr.data_in	= data_in;
+	test_attr.data_out	= data_out;
+	test_attr.ctx_in	= ctx_in;
+	test_attr.ctx_out	= ctx_out;
+
+	err = bpf_prog_test_run_xattr(&test_attr);
+	if (err) {
+		p_err("failed to run program: %s", strerror(errno));
+		goto free_ctx_out;
+	}
+
+	err = 0;
+
+	if (json_output)
+		jsonw_start_object(json_wtr);	/* root */
+
+	/* Do not exit on errors occurring when printing output data/context,
+	 * we still want to print return value and duration for program run.
+	 */
+	if (test_attr.data_size_out)
+		err += print_run_output(test_attr.data_out,
+					test_attr.data_size_out,
+					data_fname_out, "data_out");
+	if (test_attr.ctx_size_out)
+		err += print_run_output(test_attr.ctx_out,
+					test_attr.ctx_size_out,
+					ctx_fname_out, "ctx_out");
+
+	if (json_output) {
+		jsonw_uint_field(json_wtr, "retval", test_attr.retval);
+		jsonw_uint_field(json_wtr, "duration", test_attr.duration);
+		jsonw_end_object(json_wtr);	/* root */
+	} else {
+		fprintf(stdout, "Return value: %u, duration%s: %uns\n",
+			test_attr.retval,
+			repeat > 1 ? " (average)" : "", test_attr.duration);
+	}
+
+free_ctx_out:
+	free(ctx_out);
+free_ctx_in:
+	free(ctx_in);
+free_data_out:
+	free(data_out);
+free_data_in:
+	free(data_in);
+
+	return err;
+}
+
 static int load_with_options(int argc, char **argv, bool first_prog_only)
 {
 	struct bpf_object_load_attr load_attr = { 0 };
@@ -1058,6 +1397,11 @@ static int do_help(int argc, char **argv)
 		"                         [pinmaps MAP_DIR]\n"
 		"       %s %s attach PROG ATTACH_TYPE [MAP]\n"
 		"       %s %s detach PROG ATTACH_TYPE [MAP]\n"
+		"       %s %s run PROG \\\n"
+		"                         data_in FILE \\\n"
+		"                         [data_out FILE [data_size_out L]] \\\n"
+		"                         [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
+		"                         [repeat N]\n"
 		"       %s %s tracelog\n"
 		"       %s %s help\n"
 		"\n"
@@ -1079,7 +1423,8 @@ static int do_help(int argc, char **argv)
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		bin_name, argv[-2]);
 
 	return 0;
 }
@@ -1095,6 +1440,7 @@ static const struct cmd cmds[] = {
 	{ "attach",	do_attach },
 	{ "detach",	do_detach },
 	{ "tracelog",	do_tracelog },
+	{ "run",	do_run },
 	{ 0 }
 };
 
diff --git a/tools/include/linux/sizes.h b/tools/include/linux/sizes.h
new file mode 100644
index 000000000000..1cbb4c4d016e
--- /dev/null
+++ b/tools/include/linux/sizes.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * include/linux/sizes.h
+ */
+#ifndef __LINUX_SIZES_H__
+#define __LINUX_SIZES_H__
+
+#include <linux/const.h>
+
+#define SZ_1				0x00000001
+#define SZ_2				0x00000002
+#define SZ_4				0x00000004
+#define SZ_8				0x00000008
+#define SZ_16				0x00000010
+#define SZ_32				0x00000020
+#define SZ_64				0x00000040
+#define SZ_128				0x00000080
+#define SZ_256				0x00000100
+#define SZ_512				0x00000200
+
+#define SZ_1K				0x00000400
+#define SZ_2K				0x00000800
+#define SZ_4K				0x00001000
+#define SZ_8K				0x00002000
+#define SZ_16K				0x00004000
+#define SZ_32K				0x00008000
+#define SZ_64K				0x00010000
+#define SZ_128K				0x00020000
+#define SZ_256K				0x00040000
+#define SZ_512K				0x00080000
+
+#define SZ_1M				0x00100000
+#define SZ_2M				0x00200000
+#define SZ_4M				0x00400000
+#define SZ_8M				0x00800000
+#define SZ_16M				0x01000000
+#define SZ_32M				0x02000000
+#define SZ_64M				0x04000000
+#define SZ_128M				0x08000000
+#define SZ_256M				0x10000000
+#define SZ_512M				0x20000000
+
+#define SZ_1G				0x40000000
+#define SZ_2G				0x80000000
+
+#define SZ_4G				_AC(0x100000000, ULL)
+
+#endif /* __LINUX_SIZES_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From aa52bcbe0e72fac36b1862db08b9c09c4caefae3 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 5 Jul 2019 14:10:31 +0200
Subject: tools: bpftool: Fix json dump crash on powerpc

Michael reported crash with by bpf program in json mode on powerpc:

  # bpftool prog -p dump jited id 14
  [{
        "name": "0xd00000000a9aa760",
        "insns": [{
                "pc": "0x0",
                "operation": "nop",
                "operands": [null
                ]
            },{
                "pc": "0x4",
                "operation": "nop",
                "operands": [null
                ]
            },{
                "pc": "0x8",
                "operation": "mflr",
  Segmentation fault (core dumped)

The code is assuming char pointers in format, which is not always
true at least for powerpc. Fixing this by dumping the whole string
into buffer based on its format.

Please note that libopcodes code does not check return values from
fprintf callback, but as per Jakub suggestion returning -1 on allocation
failure so we do the best effort to propagate the error.

Fixes: 107f041212c1 ("tools: bpftool: add JSON output for `bpftool prog dump jited *` command")
Reported-by: Michael Petlan <mpetlan@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/jit_disasm.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 3ef3093560ba..bfed711258ce 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -11,6 +11,8 @@
  * Licensed under the GNU General Public License, version 2.0 (GPLv2)
  */
 
+#define _GNU_SOURCE
+#include <stdio.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -44,11 +46,13 @@ static int fprintf_json(void *out, const char *fmt, ...)
 	char *s;
 
 	va_start(ap, fmt);
+	if (vasprintf(&s, fmt, ap) < 0)
+		return -1;
+	va_end(ap);
+
 	if (!oper_count) {
 		int i;
 
-		s = va_arg(ap, char *);
-
 		/* Strip trailing spaces */
 		i = strlen(s) - 1;
 		while (s[i] == ' ')
@@ -61,11 +65,10 @@ static int fprintf_json(void *out, const char *fmt, ...)
 	} else if (!strcmp(fmt, ",")) {
 		   /* Skip */
 	} else {
-		s = va_arg(ap, char *);
 		jsonw_string(json_wtr, s);
 		oper_count++;
 	}
-	va_end(ap);
+	free(s);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d6dbce8fdd2b71a7aafc50b2dc5a67cc449428de Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 3 Jul 2019 14:29:07 -0700
Subject: selftests/bpf: fix test_align liveliness expectations

Commit 2589726d12a1 ("bpf: introduce bounded loops") caused a change
in the way some registers liveliness is reported in the test_align.
Add missing "_w" to a couple of tests. Note, there are no offset
changes!

Fixes: 2589726d12a1 ("bpf: introduce bounded loops")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_align.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 3c789d03b629..0262f7b374f9 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -180,7 +180,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
+			{7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
 			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
 			{8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
 			{9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
@@ -315,7 +315,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
@@ -405,7 +405,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
@@ -473,12 +473,12 @@ static struct bpf_align_test tests[] = {
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
-			{7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			{7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
 			/* Checked s>=0 */
 			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 			/* packet pointer + nonnegative (4n+2) */
 			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-			{13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -486,7 +486,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 		}
 	},
 	{
@@ -521,7 +521,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
@@ -574,7 +574,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
 			{10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
-- 
cgit v1.2.3-59-g8ed1b


From c3ec002e633e25f6e0ce6f97346dd6f38afcbd6a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 3 Jul 2019 13:09:52 -0700
Subject: selftests/bpf: add test_tcp_rtt to .gitignore

Forgot to add it in the original patch.

Fixes: b55873984dab ("selftests/bpf: test BPF_SOCK_OPS_RTT_CB")
Reported-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index a2f7f79c7908..90f70d2c7c22 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -42,3 +42,4 @@ xdping
 test_sockopt
 test_sockopt_sk
 test_sockopt_multi
+test_tcp_rtt
-- 
cgit v1.2.3-59-g8ed1b


From 81a89ef6be5befcc5a168af1aca377670bbf4997 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 4 Jul 2019 14:50:37 -0700
Subject: selftests/tls: add test for poll() with data in TLS ULP

Add a test which checks if leftover record data in TLS
layer correctly wakes up poll().

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tls.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 278c86134556..090fff9dbc48 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -644,6 +644,32 @@ TEST_F(tls, poll_wait)
 	EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len);
 }
 
+TEST_F(tls, poll_wait_split)
+{
+	struct pollfd fd = { 0, 0, 0 };
+	char send_mem[20] = {};
+	char recv_mem[15];
+
+	fd.fd = self->cfd;
+	fd.events = POLLIN;
+	/* Send 20 bytes */
+	EXPECT_EQ(send(self->fd, send_mem, sizeof(send_mem), 0),
+		  sizeof(send_mem));
+	/* Poll with inf. timeout */
+	EXPECT_EQ(poll(&fd, 1, -1), 1);
+	EXPECT_EQ(fd.revents & POLLIN, 1);
+	EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), MSG_WAITALL),
+		  sizeof(recv_mem));
+
+	/* Now the remaining 5 bytes of record data are in TLS ULP */
+	fd.fd = self->cfd;
+	fd.events = POLLIN;
+	EXPECT_EQ(poll(&fd, 1, -1), 1);
+	EXPECT_EQ(fd.revents & POLLIN, 1);
+	EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), 0),
+		  sizeof(send_mem) - sizeof(recv_mem));
+}
+
 TEST_F(tls, blocking)
 {
 	size_t data = 100000;
-- 
cgit v1.2.3-59-g8ed1b


From fb84b8224655309e7d38f989e426b82a4543f115 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 6 Jul 2019 11:06:24 -0700
Subject: libbpf: add perf buffer API

BPF_MAP_TYPE_PERF_EVENT_ARRAY map is often used to send data from BPF program
to user space for additional processing. libbpf already has very low-level API
to read single CPU perf buffer, bpf_perf_event_read_simple(), but it's hard to
use and requires a lot of code to set everything up. This patch adds
perf_buffer abstraction on top of it, abstracting setting up and polling
per-CPU logic into simple and convenient API, similar to what BCC provides.

perf_buffer__new() sets up per-CPU ring buffers and updates corresponding BPF
map entries. It accepts two user-provided callbacks: one for handling raw
samples and one for get notifications of lost samples due to buffer overflow.

perf_buffer__new_raw() is similar, but provides more control over how
perf events are set up (by accepting user-provided perf_event_attr), how
they are handled (perf_event_header pointer is passed directly to
user-provided callback), and on which CPUs ring buffers are created
(it's possible to provide a list of CPUs and corresponding map keys to
update). This API allows advanced users fuller control.

perf_buffer__poll() is used to fetch ring buffer data across all CPUs,
utilizing epoll instance.

perf_buffer__free() does corresponding clean up and unsets FDs from BPF map.

All APIs are not thread-safe. User should ensure proper locking/coordination if
used in multi-threaded set up.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c   | 366 +++++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   |  49 +++++++
 tools/lib/bpf/libbpf.map |   4 +
 3 files changed, 419 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2a08eb106221..ae569b50e2e0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -32,7 +32,9 @@
 #include <linux/limits.h>
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
+#include <sys/epoll.h>
 #include <sys/ioctl.h>
+#include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/vfs.h>
@@ -4354,6 +4356,370 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 	return ret;
 }
 
+struct perf_buffer;
+
+struct perf_buffer_params {
+	struct perf_event_attr *attr;
+	/* if event_cb is specified, it takes precendence */
+	perf_buffer_event_fn event_cb;
+	/* sample_cb and lost_cb are higher-level common-case callbacks */
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx;
+	int cpu_cnt;
+	int *cpus;
+	int *map_keys;
+};
+
+struct perf_cpu_buf {
+	struct perf_buffer *pb;
+	void *base; /* mmap()'ed memory */
+	void *buf; /* for reconstructing segmented data */
+	size_t buf_size;
+	int fd;
+	int cpu;
+	int map_key;
+};
+
+struct perf_buffer {
+	perf_buffer_event_fn event_cb;
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx; /* passed into callbacks */
+
+	size_t page_size;
+	size_t mmap_size;
+	struct perf_cpu_buf **cpu_bufs;
+	struct epoll_event *events;
+	int cpu_cnt;
+	int epoll_fd; /* perf event FD */
+	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
+				      struct perf_cpu_buf *cpu_buf)
+{
+	if (!cpu_buf)
+		return;
+	if (cpu_buf->base &&
+	    munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
+		pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
+	if (cpu_buf->fd >= 0) {
+		ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
+		close(cpu_buf->fd);
+	}
+	free(cpu_buf->buf);
+	free(cpu_buf);
+}
+
+void perf_buffer__free(struct perf_buffer *pb)
+{
+	int i;
+
+	if (!pb)
+		return;
+	if (pb->cpu_bufs) {
+		for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+			struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+			bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
+			perf_buffer__free_cpu_buf(pb, cpu_buf);
+		}
+		free(pb->cpu_bufs);
+	}
+	if (pb->epoll_fd >= 0)
+		close(pb->epoll_fd);
+	free(pb->events);
+	free(pb);
+}
+
+static struct perf_cpu_buf *
+perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
+			  int cpu, int map_key)
+{
+	struct perf_cpu_buf *cpu_buf;
+	char msg[STRERR_BUFSIZE];
+	int err;
+
+	cpu_buf = calloc(1, sizeof(*cpu_buf));
+	if (!cpu_buf)
+		return ERR_PTR(-ENOMEM);
+
+	cpu_buf->pb = pb;
+	cpu_buf->cpu = cpu;
+	cpu_buf->map_key = map_key;
+
+	cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
+			      -1, PERF_FLAG_FD_CLOEXEC);
+	if (cpu_buf->fd < 0) {
+		err = -errno;
+		pr_warning("failed to open perf buffer event on cpu #%d: %s\n",
+			   cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
+			     PROT_READ | PROT_WRITE, MAP_SHARED,
+			     cpu_buf->fd, 0);
+	if (cpu_buf->base == MAP_FAILED) {
+		cpu_buf->base = NULL;
+		err = -errno;
+		pr_warning("failed to mmap perf buffer on cpu #%d: %s\n",
+			   cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+		err = -errno;
+		pr_warning("failed to enable perf buffer event on cpu #%d: %s\n",
+			   cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	return cpu_buf;
+
+error:
+	perf_buffer__free_cpu_buf(pb, cpu_buf);
+	return (struct perf_cpu_buf *)ERR_PTR(err);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+					      struct perf_buffer_params *p);
+
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+				     const struct perf_buffer_opts *opts)
+{
+	struct perf_buffer_params p = {};
+	struct perf_event_attr attr = {
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.type = PERF_TYPE_SOFTWARE,
+		.sample_type = PERF_SAMPLE_RAW,
+		.sample_period = 1,
+		.wakeup_events = 1,
+	};
+
+	p.attr = &attr;
+	p.sample_cb = opts ? opts->sample_cb : NULL;
+	p.lost_cb = opts ? opts->lost_cb : NULL;
+	p.ctx = opts ? opts->ctx : NULL;
+
+	return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+		     const struct perf_buffer_raw_opts *opts)
+{
+	struct perf_buffer_params p = {};
+
+	p.attr = opts->attr;
+	p.event_cb = opts->event_cb;
+	p.ctx = opts->ctx;
+	p.cpu_cnt = opts->cpu_cnt;
+	p.cpus = opts->cpus;
+	p.map_keys = opts->map_keys;
+
+	return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+					      struct perf_buffer_params *p)
+{
+	struct bpf_map_info map = {};
+	char msg[STRERR_BUFSIZE];
+	struct perf_buffer *pb;
+	__u32 map_info_len;
+	int err, i;
+
+	if (page_cnt & (page_cnt - 1)) {
+		pr_warning("page count should be power of two, but is %zu\n",
+			   page_cnt);
+		return ERR_PTR(-EINVAL);
+	}
+
+	map_info_len = sizeof(map);
+	err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
+	if (err) {
+		err = -errno;
+		pr_warning("failed to get map info for map FD %d: %s\n",
+			   map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+		return ERR_PTR(err);
+	}
+
+	if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+		pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+			   map.name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	pb = calloc(1, sizeof(*pb));
+	if (!pb)
+		return ERR_PTR(-ENOMEM);
+
+	pb->event_cb = p->event_cb;
+	pb->sample_cb = p->sample_cb;
+	pb->lost_cb = p->lost_cb;
+	pb->ctx = p->ctx;
+
+	pb->page_size = getpagesize();
+	pb->mmap_size = pb->page_size * page_cnt;
+	pb->map_fd = map_fd;
+
+	pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (pb->epoll_fd < 0) {
+		err = -errno;
+		pr_warning("failed to create epoll instance: %s\n",
+			   libbpf_strerror_r(err, msg, sizeof(msg)));
+		goto error;
+	}
+
+	if (p->cpu_cnt > 0) {
+		pb->cpu_cnt = p->cpu_cnt;
+	} else {
+		pb->cpu_cnt = libbpf_num_possible_cpus();
+		if (pb->cpu_cnt < 0) {
+			err = pb->cpu_cnt;
+			goto error;
+		}
+		if (map.max_entries < pb->cpu_cnt)
+			pb->cpu_cnt = map.max_entries;
+	}
+
+	pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
+	if (!pb->events) {
+		err = -ENOMEM;
+		pr_warning("failed to allocate events: out of memory\n");
+		goto error;
+	}
+	pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
+	if (!pb->cpu_bufs) {
+		err = -ENOMEM;
+		pr_warning("failed to allocate buffers: out of memory\n");
+		goto error;
+	}
+
+	for (i = 0; i < pb->cpu_cnt; i++) {
+		struct perf_cpu_buf *cpu_buf;
+		int cpu, map_key;
+
+		cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
+		map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
+
+		cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
+		if (IS_ERR(cpu_buf)) {
+			err = PTR_ERR(cpu_buf);
+			goto error;
+		}
+
+		pb->cpu_bufs[i] = cpu_buf;
+
+		err = bpf_map_update_elem(pb->map_fd, &map_key,
+					  &cpu_buf->fd, 0);
+		if (err) {
+			err = -errno;
+			pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
+				   cpu, map_key, cpu_buf->fd,
+				   libbpf_strerror_r(err, msg, sizeof(msg)));
+			goto error;
+		}
+
+		pb->events[i].events = EPOLLIN;
+		pb->events[i].data.ptr = cpu_buf;
+		if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
+			      &pb->events[i]) < 0) {
+			err = -errno;
+			pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
+				   cpu, cpu_buf->fd,
+				   libbpf_strerror_r(err, msg, sizeof(msg)));
+			goto error;
+		}
+	}
+
+	return pb;
+
+error:
+	if (pb)
+		perf_buffer__free(pb);
+	return ERR_PTR(err);
+}
+
+struct perf_sample_raw {
+	struct perf_event_header header;
+	uint32_t size;
+	char data[0];
+};
+
+struct perf_sample_lost {
+	struct perf_event_header header;
+	uint64_t id;
+	uint64_t lost;
+	uint64_t sample_id;
+};
+
+static enum bpf_perf_event_ret
+perf_buffer__process_record(struct perf_event_header *e, void *ctx)
+{
+	struct perf_cpu_buf *cpu_buf = ctx;
+	struct perf_buffer *pb = cpu_buf->pb;
+	void *data = e;
+
+	/* user wants full control over parsing perf event */
+	if (pb->event_cb)
+		return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
+
+	switch (e->type) {
+	case PERF_RECORD_SAMPLE: {
+		struct perf_sample_raw *s = data;
+
+		if (pb->sample_cb)
+			pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
+		break;
+	}
+	case PERF_RECORD_LOST: {
+		struct perf_sample_lost *s = data;
+
+		if (pb->lost_cb)
+			pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
+		break;
+	}
+	default:
+		pr_warning("unknown perf sample type %d\n", e->type);
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static int perf_buffer__process_records(struct perf_buffer *pb,
+					struct perf_cpu_buf *cpu_buf)
+{
+	enum bpf_perf_event_ret ret;
+
+	ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+					 pb->page_size, &cpu_buf->buf,
+					 &cpu_buf->buf_size,
+					 perf_buffer__process_record, cpu_buf);
+	if (ret != LIBBPF_PERF_EVENT_CONT)
+		return ret;
+	return 0;
+}
+
+int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
+{
+	int i, cnt, err;
+
+	cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+	for (i = 0; i < cnt; i++) {
+		struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
+
+		err = perf_buffer__process_records(pb, cpu_buf);
+		if (err) {
+			pr_warning("error while processing records: %d\n", err);
+			return err;
+		}
+	}
+	return cnt < 0 ? -errno : cnt;
+}
+
 struct bpf_prog_info_array_desc {
 	int	array_offset;	/* e.g. offset of jited_prog_insns */
 	int	count_offset;	/* e.g. offset of jited_prog_len */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f55933784f95..5cbf459ece0b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -358,6 +358,26 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
 LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
 LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
 
+struct perf_buffer;
+
+typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
+				      void *data, __u32 size);
+typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
+
+/* common use perf buffer options */
+struct perf_buffer_opts {
+	/* if specified, sample_cb is called for each sample */
+	perf_buffer_sample_fn sample_cb;
+	/* if specified, lost_cb is called for each batch of lost samples */
+	perf_buffer_lost_fn lost_cb;
+	/* ctx is provided to sample_cb and lost_cb */
+	void *ctx;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new(int map_fd, size_t page_cnt,
+		 const struct perf_buffer_opts *opts);
+
 enum bpf_perf_event_ret {
 	LIBBPF_PERF_EVENT_DONE	= 0,
 	LIBBPF_PERF_EVENT_ERROR	= -1,
@@ -365,6 +385,35 @@ enum bpf_perf_event_ret {
 };
 
 struct perf_event_header;
+
+typedef enum bpf_perf_event_ret
+(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event);
+
+/* raw perf buffer options, giving most power and control */
+struct perf_buffer_raw_opts {
+	/* perf event attrs passed directly into perf_event_open() */
+	struct perf_event_attr *attr;
+	/* raw event callback */
+	perf_buffer_event_fn event_cb;
+	/* ctx is provided to event_cb */
+	void *ctx;
+	/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
+	 * max_entries of given PERF_EVENT_ARRAY map)
+	 */
+	int cpu_cnt;
+	/* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */
+	int *cpus;
+	/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
+	int *map_keys;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+		     const struct perf_buffer_raw_opts *opts);
+
+LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+
 typedef enum bpf_perf_event_ret
 	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
 				  void *private_data);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e6b7d4edbc93..f9d316e873d8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -179,4 +179,8 @@ LIBBPF_0.0.4 {
 		btf_dump__new;
 		btf__parse_elf;
 		libbpf_num_possible_cpus;
+		perf_buffer__free;
+		perf_buffer__new;
+		perf_buffer__new_raw;
+		perf_buffer__poll;
 } LIBBPF_0.0.3;
-- 
cgit v1.2.3-59-g8ed1b


From d7ff34d5fb5a2f6722786863c4afa79c51f8edf2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 6 Jul 2019 11:06:25 -0700
Subject: libbpf: auto-set PERF_EVENT_ARRAY size to number of CPUs

For BPF_MAP_TYPE_PERF_EVENT_ARRAY typically correct size is number of
possible CPUs. This is impossible to specify at compilation time. This
change adds automatic setting of PERF_EVENT_ARRAY size to number of
system CPUs, unless non-zero size is specified explicitly. This allows
to adjust size for advanced specific cases, while providing convenient
and logical defaults.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ae569b50e2e0..ed07789b3e62 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2116,6 +2116,7 @@ static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
 	struct bpf_create_map_attr create_attr = {};
+	int nr_cpus = 0;
 	unsigned int i;
 	int err;
 
@@ -2138,7 +2139,22 @@ bpf_object__create_maps(struct bpf_object *obj)
 		create_attr.map_flags = def->map_flags;
 		create_attr.key_size = def->key_size;
 		create_attr.value_size = def->value_size;
-		create_attr.max_entries = def->max_entries;
+		if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
+		    !def->max_entries) {
+			if (!nr_cpus)
+				nr_cpus = libbpf_num_possible_cpus();
+			if (nr_cpus < 0) {
+				pr_warning("failed to determine number of system CPUs: %d\n",
+					   nr_cpus);
+				err = nr_cpus;
+				goto err_out;
+			}
+			pr_debug("map '%s': setting size to %d\n",
+				 map->name, nr_cpus);
+			create_attr.max_entries = nr_cpus;
+		} else {
+			create_attr.max_entries = def->max_entries;
+		}
 		create_attr.btf_fd = 0;
 		create_attr.btf_key_type_id = 0;
 		create_attr.btf_value_type_id = 0;
@@ -2155,9 +2171,10 @@ bpf_object__create_maps(struct bpf_object *obj)
 		*pfd = bpf_create_map_xattr(&create_attr);
 		if (*pfd < 0 && (create_attr.btf_key_type_id ||
 				 create_attr.btf_value_type_id)) {
-			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+			err = -errno;
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
-				   map->name, cp, errno);
+				   map->name, cp, err);
 			create_attr.btf_fd = 0;
 			create_attr.btf_key_type_id = 0;
 			create_attr.btf_value_type_id = 0;
@@ -2169,11 +2186,11 @@ bpf_object__create_maps(struct bpf_object *obj)
 		if (*pfd < 0) {
 			size_t j;
 
-			err = *pfd;
+			err = -errno;
 err_out:
-			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-			pr_warning("failed to create map (name: '%s'): %s\n",
-				   map->name, cp);
+			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+			pr_warning("failed to create map (name: '%s'): %s(%d)\n",
+				   map->name, cp, err);
 			for (j = 0; j < i; j++)
 				zclose(obj->maps[j].fd);
 			return err;
-- 
cgit v1.2.3-59-g8ed1b


From ee5cf82ce04a60d71b0dc219804f6cf1e6ba6d14 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 6 Jul 2019 11:06:26 -0700
Subject: selftests/bpf: test perf buffer API

Add test verifying perf buffer API functionality.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 .../testing/selftests/bpf/prog_tests/perf_buffer.c | 100 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_perf_buffer.c |  25 ++++++
 2 files changed, 125 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/perf_buffer.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_perf_buffer.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
new file mode 100644
index 000000000000..3f1ef95865ff
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+
+#ifdef __x86_64__
+#define SYS_KPROBE_NAME "__x64_sys_nanosleep"
+#else
+#define SYS_KPROBE_NAME "sys_nanosleep"
+#endif
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+	int cpu_data = *(int *)data, duration = 0;
+	cpu_set_t *cpu_seen = ctx;
+
+	if (cpu_data != cpu)
+		CHECK(cpu_data != cpu, "check_cpu_data",
+		      "cpu_data %d != cpu %d\n", cpu_data, cpu);
+
+	CPU_SET(cpu, cpu_seen);
+}
+
+void test_perf_buffer(void)
+{
+	int err, prog_fd, nr_cpus, i, duration = 0;
+	const char *prog_name = "kprobe/sys_nanosleep";
+	const char *file = "./test_perf_buffer.o";
+	struct perf_buffer_opts pb_opts = {};
+	struct bpf_map *perf_buf_map;
+	cpu_set_t cpu_set, cpu_seen;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	struct perf_buffer *pb;
+	struct bpf_link *link;
+
+	nr_cpus = libbpf_num_possible_cpus();
+	if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
+		return;
+
+	/* load program */
+	err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
+	if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+		return;
+
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
+		goto out_close;
+
+	/* load map */
+	perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
+	if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+		goto out_close;
+
+	/* attach kprobe */
+	link = bpf_program__attach_kprobe(prog, false /* retprobe */,
+					  SYS_KPROBE_NAME);
+	if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+		goto out_close;
+
+	/* set up perf buffer */
+	pb_opts.sample_cb = on_sample;
+	pb_opts.ctx = &cpu_seen;
+	pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
+	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+		goto out_detach;
+
+	/* trigger kprobe on every CPU */
+	CPU_ZERO(&cpu_seen);
+	for (i = 0; i < nr_cpus; i++) {
+		CPU_ZERO(&cpu_set);
+		CPU_SET(i, &cpu_set);
+
+		err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
+					     &cpu_set);
+		if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
+				 i, err))
+			goto out_detach;
+
+		usleep(1);
+	}
+
+	/* read perf buffer */
+	err = perf_buffer__poll(pb, 100);
+	if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+		goto out_free_pb;
+
+	if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
+		  "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
+		goto out_free_pb;
+
+out_free_pb:
+	perf_buffer__free(pb);
+out_detach:
+	bpf_link__destroy(link);
+out_close:
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
new file mode 100644
index 000000000000..876c27deb65a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
+SEC("kprobe/sys_nanosleep")
+int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+{
+	int cpu = bpf_get_smp_processor_id();
+
+	bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU,
+			      &cpu, sizeof(cpu));
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1;
-- 
cgit v1.2.3-59-g8ed1b


From 9b190f185d2f6d65b4c16074db0c4eafa32c7ae1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 6 Jul 2019 11:06:27 -0700
Subject: tools/bpftool: switch map event_pipe to libbpf's perf_buffer

Switch event_pipe implementation to rely on new libbpf perf buffer API
(it's raw low-level variant).

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map_perf_ring.c | 201 ++++++++++++--------------------------
 1 file changed, 64 insertions(+), 137 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 0507dfaf7a8f..3f108ab17797 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -28,7 +28,7 @@
 
 #define MMAP_PAGE_CNT	16
 
-static bool stop;
+static volatile bool stop;
 
 struct event_ring_info {
 	int fd;
@@ -44,32 +44,44 @@ struct perf_event_sample {
 	unsigned char data[];
 };
 
+struct perf_event_lost {
+	struct perf_event_header header;
+	__u64 id;
+	__u64 lost;
+};
+
 static void int_exit(int signo)
 {
 	fprintf(stderr, "Stopping...\n");
 	stop = true;
 }
 
+struct event_pipe_ctx {
+	bool all_cpus;
+	int cpu;
+	int idx;
+};
+
 static enum bpf_perf_event_ret
-print_bpf_output(struct perf_event_header *event, void *private_data)
+print_bpf_output(void *private_data, int cpu, struct perf_event_header *event)
 {
-	struct perf_event_sample *e = container_of(event, struct perf_event_sample,
+	struct perf_event_sample *e = container_of(event,
+						   struct perf_event_sample,
 						   header);
-	struct event_ring_info *ring = private_data;
-	struct {
-		struct perf_event_header header;
-		__u64 id;
-		__u64 lost;
-	} *lost = (typeof(lost))event;
+	struct perf_event_lost *lost = container_of(event,
+						    struct perf_event_lost,
+						    header);
+	struct event_pipe_ctx *ctx = private_data;
+	int idx = ctx->all_cpus ? cpu : ctx->idx;
 
 	if (json_output) {
 		jsonw_start_object(json_wtr);
 		jsonw_name(json_wtr, "type");
 		jsonw_uint(json_wtr, e->header.type);
 		jsonw_name(json_wtr, "cpu");
-		jsonw_uint(json_wtr, ring->cpu);
+		jsonw_uint(json_wtr, cpu);
 		jsonw_name(json_wtr, "index");
-		jsonw_uint(json_wtr, ring->key);
+		jsonw_uint(json_wtr, idx);
 		if (e->header.type == PERF_RECORD_SAMPLE) {
 			jsonw_name(json_wtr, "timestamp");
 			jsonw_uint(json_wtr, e->time);
@@ -89,7 +101,7 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
 		if (e->header.type == PERF_RECORD_SAMPLE) {
 			printf("== @%lld.%09lld CPU: %d index: %d =====\n",
 			       e->time / 1000000000ULL, e->time % 1000000000ULL,
-			       ring->cpu, ring->key);
+			       cpu, idx);
 			fprint_hex(stdout, e->data, e->size, " ");
 			printf("\n");
 		} else if (e->header.type == PERF_RECORD_LOST) {
@@ -103,87 +115,25 @@ print_bpf_output(struct perf_event_header *event, void *private_data)
 	return LIBBPF_PERF_EVENT_CONT;
 }
 
-static void
-perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
-{
-	enum bpf_perf_event_ret ret;
-
-	ret = bpf_perf_event_read_simple(ring->mem,
-					 MMAP_PAGE_CNT * get_page_size(),
-					 get_page_size(), buf, buf_len,
-					 print_bpf_output, ring);
-	if (ret != LIBBPF_PERF_EVENT_CONT) {
-		fprintf(stderr, "perf read loop failed with %d\n", ret);
-		stop = true;
-	}
-}
-
-static int perf_mmap_size(void)
-{
-	return get_page_size() * (MMAP_PAGE_CNT + 1);
-}
-
-static void *perf_event_mmap(int fd)
-{
-	int mmap_size = perf_mmap_size();
-	void *base;
-
-	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	if (base == MAP_FAILED) {
-		p_err("event mmap failed: %s\n", strerror(errno));
-		return NULL;
-	}
-
-	return base;
-}
-
-static void perf_event_unmap(void *mem)
-{
-	if (munmap(mem, perf_mmap_size()))
-		fprintf(stderr, "Can't unmap ring memory!\n");
-}
-
-static int bpf_perf_event_open(int map_fd, int key, int cpu)
+int do_event_pipe(int argc, char **argv)
 {
-	struct perf_event_attr attr = {
+	struct perf_event_attr perf_attr = {
 		.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
 		.type = PERF_TYPE_SOFTWARE,
 		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.sample_period = 1,
+		.wakeup_events = 1,
 	};
-	int pmu_fd;
-
-	pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
-	if (pmu_fd < 0) {
-		p_err("failed to open perf event %d for CPU %d", key, cpu);
-		return -1;
-	}
-
-	if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
-		p_err("failed to update map for event %d for CPU %d", key, cpu);
-		goto err_close;
-	}
-	if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
-		p_err("failed to enable event %d for CPU %d", key, cpu);
-		goto err_close;
-	}
-
-	return pmu_fd;
-
-err_close:
-	close(pmu_fd);
-	return -1;
-}
-
-int do_event_pipe(int argc, char **argv)
-{
-	int i, nfds, map_fd, index = -1, cpu = -1;
 	struct bpf_map_info map_info = {};
-	struct event_ring_info *rings;
-	size_t tmp_buf_sz = 0;
-	void *tmp_buf = NULL;
-	struct pollfd *pfds;
+	struct perf_buffer_raw_opts opts = {};
+	struct event_pipe_ctx ctx = {
+		.all_cpus = true,
+		.cpu = -1,
+		.idx = -1,
+	};
+	struct perf_buffer *pb;
 	__u32 map_info_len;
-	bool do_all = true;
+	int err, map_fd;
 
 	map_info_len = sizeof(map_info);
 	map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
@@ -205,7 +155,7 @@ int do_event_pipe(int argc, char **argv)
 			char *endptr;
 
 			NEXT_ARG();
-			cpu = strtoul(*argv, &endptr, 0);
+			ctx.cpu = strtoul(*argv, &endptr, 0);
 			if (*endptr) {
 				p_err("can't parse %s as CPU ID", **argv);
 				goto err_close_map;
@@ -216,7 +166,7 @@ int do_event_pipe(int argc, char **argv)
 			char *endptr;
 
 			NEXT_ARG();
-			index = strtoul(*argv, &endptr, 0);
+			ctx.idx = strtoul(*argv, &endptr, 0);
 			if (*endptr) {
 				p_err("can't parse %s as index", **argv);
 				goto err_close_map;
@@ -228,45 +178,32 @@ int do_event_pipe(int argc, char **argv)
 			goto err_close_map;
 		}
 
-		do_all = false;
+		ctx.all_cpus = false;
 	}
 
-	if (!do_all) {
-		if (index == -1 || cpu == -1) {
+	if (!ctx.all_cpus) {
+		if (ctx.idx == -1 || ctx.cpu == -1) {
 			p_err("cpu and index must be specified together");
 			goto err_close_map;
 		}
-
-		nfds = 1;
 	} else {
-		nfds = min(get_possible_cpus(), map_info.max_entries);
-		cpu = 0;
-		index = 0;
+		ctx.cpu = 0;
+		ctx.idx = 0;
 	}
 
-	rings = calloc(nfds, sizeof(rings[0]));
-	if (!rings)
+	opts.attr = &perf_attr;
+	opts.event_cb = print_bpf_output;
+	opts.ctx = &ctx;
+	opts.cpu_cnt = ctx.all_cpus ? 0 : 1;
+	opts.cpus = &ctx.cpu;
+	opts.map_keys = &ctx.idx;
+
+	pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts);
+	err = libbpf_get_error(pb);
+	if (err) {
+		p_err("failed to create perf buffer: %s (%d)",
+		      strerror(err), err);
 		goto err_close_map;
-
-	pfds = calloc(nfds, sizeof(pfds[0]));
-	if (!pfds)
-		goto err_free_rings;
-
-	for (i = 0; i < nfds; i++) {
-		rings[i].cpu = cpu + i;
-		rings[i].key = index + i;
-
-		rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
-						  rings[i].cpu);
-		if (rings[i].fd < 0)
-			goto err_close_fds_prev;
-
-		rings[i].mem = perf_event_mmap(rings[i].fd);
-		if (!rings[i].mem)
-			goto err_close_fds_current;
-
-		pfds[i].fd = rings[i].fd;
-		pfds[i].events = POLLIN;
 	}
 
 	signal(SIGINT, int_exit);
@@ -277,34 +214,24 @@ int do_event_pipe(int argc, char **argv)
 		jsonw_start_array(json_wtr);
 
 	while (!stop) {
-		poll(pfds, nfds, 200);
-		for (i = 0; i < nfds; i++)
-			perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
+		err = perf_buffer__poll(pb, 200);
+		if (err < 0 && err != -EINTR) {
+			p_err("perf buffer polling failed: %s (%d)",
+			      strerror(err), err);
+			goto err_close_pb;
+		}
 	}
-	free(tmp_buf);
 
 	if (json_output)
 		jsonw_end_array(json_wtr);
 
-	for (i = 0; i < nfds; i++) {
-		perf_event_unmap(rings[i].mem);
-		close(rings[i].fd);
-	}
-	free(pfds);
-	free(rings);
+	perf_buffer__free(pb);
 	close(map_fd);
 
 	return 0;
 
-err_close_fds_prev:
-	while (i--) {
-		perf_event_unmap(rings[i].mem);
-err_close_fds_current:
-		close(rings[i].fd);
-	}
-	free(pfds);
-err_free_rings:
-	free(rings);
+err_close_pb:
+	perf_buffer__free(pb);
 err_close_map:
 	close(map_fd);
 	return -1;
-- 
cgit v1.2.3-59-g8ed1b


From cd07a95f9b4df63ac130b4ee0cf87beca12412ca Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Sat, 6 Jul 2019 11:06:28 -0700
Subject: libbpf: add perf_buffer_ prefix to README

perf_buffer "object" is part of libbpf API now, add it to the list of
libbpf function prefixes.

Suggested-by: Daniel Borkman <daniel@iogearbox.net>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/README.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
index cef7b77eab69..8928f7787f2d 100644
--- a/tools/lib/bpf/README.rst
+++ b/tools/lib/bpf/README.rst
@@ -9,7 +9,8 @@ described here. It's recommended to follow these conventions whenever a
 new function or type is added to keep libbpf API clean and consistent.
 
 All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``.
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
+``perf_buffer_``.
 
 System call wrappers
 --------------------
-- 
cgit v1.2.3-59-g8ed1b


From 4cfacbe6df972db4081dd76feb7871fce996118d Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 1 Jul 2019 10:38:40 -0700
Subject: bpf: sync bpf.h to tools/

Sync user_ip6 & msg_src_ip6 comments.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index cecf42c871d4..e692bb21846f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3244,7 +3244,7 @@ struct bpf_sock_addr {
 	__u32 user_ip4;		/* Allows 1,2,4-byte read and 4-byte write.
 				 * Stored in network byte order.
 				 */
-	__u32 user_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+	__u32 user_ip6[4];	/* Allows 1,2,4-byte read and 4,8-byte write.
 				 * Stored in network byte order.
 				 */
 	__u32 user_port;	/* Allows 4-byte read and write.
@@ -3253,10 +3253,10 @@ struct bpf_sock_addr {
 	__u32 family;		/* Allows 4-byte read, but no write */
 	__u32 type;		/* Allows 4-byte read, but no write */
 	__u32 protocol;		/* Allows 4-byte read, but no write */
-	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read an 4-byte write.
+	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read and 4-byte write.
 				 * Stored in network byte order.
 				 */
-	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read and 4,8-byte write.
 				 * Stored in network byte order.
 				 */
 	__bpf_md_ptr(struct bpf_sock *, sk);
-- 
cgit v1.2.3-59-g8ed1b


From 76d950773cd2a365f492ce973429ae629d58131c Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 1 Jul 2019 10:38:41 -0700
Subject: selftests/bpf: add verifier tests for wide stores

Make sure that wide stores are allowed at proper (aligned) addresses.
Note that user_ip6 is naturally aligned on 8-byte boundary, so
correct addresses are user_ip6[0] and user_ip6[2]. msg_src_ip6 is,
however, aligned on a 4-byte bondary, so only msg_src_ip6[1]
can be wide-stored.

Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_verifier.c       | 17 +++++++++--
 tools/testing/selftests/bpf/verifier/wide_store.c | 36 +++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/verifier/wide_store.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index c5514daf8865..b0773291012a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -105,6 +105,7 @@ struct bpf_test {
 			__u64 data64[TEST_DATA_LEN / 8];
 		};
 	} retvals[MAX_TEST_RUNS];
+	enum bpf_attach_type expected_attach_type;
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -850,6 +851,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	int fd_prog, expected_ret, alignment_prevented_execution;
 	int prog_len, prog_type = test->prog_type;
 	struct bpf_insn *prog = test->insns;
+	struct bpf_load_program_attr attr;
 	int run_errs, run_successes;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
@@ -881,8 +883,17 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
 		pflags |= BPF_F_ANY_ALIGNMENT;
-	fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
-				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 4);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = prog_type;
+	attr.expected_attach_type = test->expected_attach_type;
+	attr.insns = prog;
+	attr.insns_cnt = prog_len;
+	attr.license = "GPL";
+	attr.log_level = 4;
+	attr.prog_flags = pflags;
+
+	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
 	if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
@@ -912,7 +923,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 			printf("FAIL\nUnexpected success to load!\n");
 			goto fail_log;
 		}
-		if (!strstr(bpf_vlog, expected_err)) {
+		if (!expected_err || !strstr(bpf_vlog, expected_err)) {
 			printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
 			      expected_err, bpf_vlog);
 			goto fail_log;
diff --git a/tools/testing/selftests/bpf/verifier/wide_store.c b/tools/testing/selftests/bpf/verifier/wide_store.c
new file mode 100644
index 000000000000..8fe99602ded4
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/wide_store.c
@@ -0,0 +1,36 @@
+#define BPF_SOCK_ADDR(field, off, res, err) \
+{ \
+	"wide store to bpf_sock_addr." #field "[" #off "]", \
+	.insns = { \
+	BPF_MOV64_IMM(BPF_REG_0, 1), \
+	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, \
+		    offsetof(struct bpf_sock_addr, field[off])), \
+	BPF_EXIT_INSN(), \
+	}, \
+	.result = res, \
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
+	.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
+	.errstr = err, \
+}
+
+/* user_ip6[0] is u64 aligned */
+BPF_SOCK_ADDR(user_ip6, 0, ACCEPT,
+	      NULL),
+BPF_SOCK_ADDR(user_ip6, 1, REJECT,
+	      "invalid bpf_context access off=12 size=8"),
+BPF_SOCK_ADDR(user_ip6, 2, ACCEPT,
+	      NULL),
+BPF_SOCK_ADDR(user_ip6, 3, REJECT,
+	      "invalid bpf_context access off=20 size=8"),
+
+/* msg_src_ip6[0] is _not_ u64 aligned */
+BPF_SOCK_ADDR(msg_src_ip6, 0, REJECT,
+	      "invalid bpf_context access off=44 size=8"),
+BPF_SOCK_ADDR(msg_src_ip6, 1, ACCEPT,
+	      NULL),
+BPF_SOCK_ADDR(msg_src_ip6, 2, REJECT,
+	      "invalid bpf_context access off=52 size=8"),
+BPF_SOCK_ADDR(msg_src_ip6, 3, REJECT,
+	      "invalid bpf_context access off=56 size=8"),
+
+#undef BPF_SOCK_ADDR
-- 
cgit v1.2.3-59-g8ed1b


From 06ec0e2c490afd2f870d89c59200540fd9acde95 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 5 Jul 2019 21:44:20 -0700
Subject: selftests/bpf: fix test_attach_probe map definition

ef99b02b23ef ("libbpf: capture value in BTF type info for BTF-defined map
defs") changed BTF-defined maps syntax, while independently merged
1e8611bbdfc9 ("selftests/bpf: add kprobe/uprobe selftests") added new
test using outdated syntax of maps. This patch fixes this test after
corresponding patch sets were merged.

Fixes: ef99b02b23ef ("libbpf: capture value in BTF type info for BTF-defined map defs")
Fixes: 1e8611bbdfc9 ("selftests/bpf: add kprobe/uprobe selftests")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/progs/test_attach_probe.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 7a7c5cd728c8..63a8dfef893b 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -6,14 +6,11 @@
 #include "bpf_helpers.h"
 
 struct {
-	int type;
-	int max_entries;
-	int *key;
-	int *value;
-} results_map SEC(".maps") = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.max_entries = 4,
-};
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4);
+	__type(key, int);
+	__type(value, int);
+} results_map SEC(".maps");
 
 SEC("kprobe/sys_nanosleep")
 int handle_sys_nanosleep_entry(struct pt_regs *ctx)
-- 
cgit v1.2.3-59-g8ed1b


From 8fc9f8bedf1bdaea48382ae2e3dd558e2b939cee Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Mon, 8 Jul 2019 14:05:46 +0100
Subject: tools: bpftool: add completion for bpftool prog "loadall"

Bash completion for proposing the "loadall" subcommand is missing. Let's
add it to the completion script.

Add a specific case to propose "load" and "loadall" for completing:

    $ bpftool prog load
                       ^ cursor is here

Otherwise, completion considers that $command is in load|loadall and
starts making related completions (file or directory names, as the
number of words on the command line is below 6), when the only suggested
keywords should be "load" and "loadall" until one has been picked and a
space entered after that to move to the next word.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/bash-completion/bpftool | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 965a8658cca3..c8f42e1fcbc9 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -342,6 +342,13 @@ _bpftool()
                 load|loadall)
                     local obj
 
+                    # Propose "load/loadall" to complete "bpftool prog load",
+                    # or bash tries to complete "load" as a filename below.
+                    if [[ ${#words[@]} -eq 3 ]]; then
+                        COMPREPLY=( $( compgen -W "load loadall" -- "$cur" ) )
+                        return 0
+                    fi
+
                     if [[ ${#words[@]} -lt 6 ]]; then
                         _filedir
                         return 0
@@ -435,7 +442,7 @@ _bpftool()
                 *)
                     [[ $prev == $object ]] && \
                         COMPREPLY=( $( compgen -W 'dump help pin attach detach \
-                            load show list tracelog run' -- "$cur" ) )
+                            load loadall show list tracelog run' -- "$cur" ) )
                     ;;
             esac
             ;;
-- 
cgit v1.2.3-59-g8ed1b


From cecaa76b2919aac2aa584ce476e9fcd5b084add5 Mon Sep 17 00:00:00 2001
From: Frank de Brabander <debrabander@gmail.com>
Date: Fri, 5 Jul 2019 13:43:14 +0200
Subject: selftests: txring_overwrite: fix incorrect test of mmap() return
 value

If mmap() fails it returns MAP_FAILED, which is defined as ((void *) -1).
The current if-statement incorrectly tests if *ring is NULL.

Fixes: 358be656406d ("selftests/net: add txring_overwrite")
Signed-off-by: Frank de Brabander <debrabander@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/txring_overwrite.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/txring_overwrite.c b/tools/testing/selftests/net/txring_overwrite.c
index fd8b1c663c39..7d9ea039450a 100644
--- a/tools/testing/selftests/net/txring_overwrite.c
+++ b/tools/testing/selftests/net/txring_overwrite.c
@@ -113,7 +113,7 @@ static int setup_tx(char **ring)
 
 	*ring = mmap(0, req.tp_block_size * req.tp_block_nr,
 		     PROT_READ | PROT_WRITE, MAP_SHARED, fdt, 0);
-	if (!*ring)
+	if (*ring == MAP_FAILED)
 		error(1, errno, "mmap");
 
 	return fdt;
-- 
cgit v1.2.3-59-g8ed1b


From bc2d8afecbec33bd4549ce1ef904383fde21d385 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Wed, 3 Jul 2019 13:50:34 +0200
Subject: selftests/bpf: fix test_reuseport_array on s390

Fix endianness issue: passing a pointer to 64-bit fd as a 32-bit key
does not work on big-endian architectures. So cast fd to 32-bits when
necessary.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_maps.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index a3fbc571280a..5443b9bd75ed 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1418,7 +1418,7 @@ static void test_map_wronly(void)
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
 }
 
-static void prepare_reuseport_grp(int type, int map_fd,
+static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
 				  __s64 *fds64, __u64 *sk_cookies,
 				  unsigned int n)
 {
@@ -1428,6 +1428,8 @@ static void prepare_reuseport_grp(int type, int map_fd,
 	const int optval = 1;
 	unsigned int i;
 	u64 sk_cookie;
+	void *value;
+	__s32 fd32;
 	__s64 fd64;
 	int err;
 
@@ -1449,8 +1451,14 @@ static void prepare_reuseport_grp(int type, int map_fd,
 		      "err:%d errno:%d\n", err, errno);
 
 		/* reuseport_array does not allow unbound sk */
-		err = bpf_map_update_elem(map_fd, &index0, &fd64,
-					  BPF_ANY);
+		if (map_elem_size == sizeof(__u64))
+			value = &fd64;
+		else {
+			assert(map_elem_size == sizeof(__u32));
+			fd32 = (__s32)fd64;
+			value = &fd32;
+		}
+		err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
 		CHECK(err != -1 || errno != EINVAL,
 		      "reuseport array update unbound sk",
 		      "sock_type:%d err:%d errno:%d\n",
@@ -1478,7 +1486,7 @@ static void prepare_reuseport_grp(int type, int map_fd,
 			 * reuseport_array does not allow
 			 * non-listening tcp sk.
 			 */
-			err = bpf_map_update_elem(map_fd, &index0, &fd64,
+			err = bpf_map_update_elem(map_fd, &index0, value,
 						  BPF_ANY);
 			CHECK(err != -1 || errno != EINVAL,
 			      "reuseport array update non-listening sk",
@@ -1541,7 +1549,7 @@ static void test_reuseport_array(void)
 	for (t = 0; t < ARRAY_SIZE(types); t++) {
 		type = types[t];
 
-		prepare_reuseport_grp(type, map_fd, grpa_fds64,
+		prepare_reuseport_grp(type, map_fd, sizeof(__u64), grpa_fds64,
 				      grpa_cookies, ARRAY_SIZE(grpa_fds64));
 
 		/* Test BPF_* update flags */
@@ -1649,7 +1657,8 @@ static void test_reuseport_array(void)
 				sizeof(__u32), sizeof(__u32), array_size, 0);
 	CHECK(map_fd == -1, "reuseport array create",
 	      "map_fd:%d, errno:%d\n", map_fd, errno);
-	prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
+	prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
+			      &sk_cookie, 1);
 	fd = fd64;
 	err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
 	CHECK(err == -1, "reuseport array update 32 bit fd",
-- 
cgit v1.2.3-59-g8ed1b


From 2800f2485417a80633285503c782dd9416df85e4 Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Sat, 6 Jul 2019 10:55:19 -0400
Subject: selftests: forwarding: Test multipath hashing on inner IP pkts for
 GRE tunnel

Add selftest scripts for multipath hashing on inner IP pkts when there
is a single GRE tunnel but there are multiple underlay routes to reach
the other end of the tunnel.

Four cases are covered in these scripts:
    - IPv4 inner, IPv4 outer
    - IPv6 inner, IPv4 outer
    - IPv4 inner, IPv6 outer
    - IPv6 inner, IPv6 outer

Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/forwarding/gre_inner_v4_multipath.sh       | 305 ++++++++++++++++++++
 .../net/forwarding/gre_inner_v6_multipath.sh       | 306 +++++++++++++++++++++
 .../net/forwarding/ip6gre_inner_v4_multipath.sh    | 304 ++++++++++++++++++++
 .../net/forwarding/ip6gre_inner_v6_multipath.sh    | 305 ++++++++++++++++++++
 4 files changed, 1220 insertions(+)
 create mode 100755 tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
 create mode 100755 tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
 create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
 create mode 100755 tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
new file mode 100755
index 000000000000..e4009f658003
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
@@ -0,0 +1,305 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv4
+# GRE tunnel. The tunnel carries IPv4 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# | 192.0.3.{2-62}/24 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.3.1/24                          |
+# |                                            |
+# |  + g1 (gre)                                |
+# |    loc=192.0.2.65                          |
+# |    rem=192.0.2.66 --.                      |
+# |    tos=inherit      |                      |
+# |                     v                      |
+# |                     + $ul1                 |
+# |                     | 192.0.2.129/28       |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# | SW2                 |                      |
+# |               $ul21 +                      |
+# |      192.0.2.130/28                        |
+# |                   |                        |
+# !   ________________|_____                   |
+# |  /                      \                  |
+# |  |                      |                  |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan) |
+# |  | 192.0.2.145/28       | 192.0.2.161/28   |
+# |  |                      |                  |
+# +--|----------------------|------------------+
+#    |                      |
+# +--|----------------------|------------------+
+# |  |                      |                  |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan) |
+# |  | 192.0.2.146/28       | 192.0.2.162/28   |
+# |  |                      |                  |
+# |  \______________________/                  |
+# |                   |                        |
+# |                   |                        |
+# |               $ul31 +                      |
+# |      192.0.2.177/28 |                  SW3 |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# |                     + $ul4                 |
+# |                     ^ 192.0.2.178/28       |
+# |                     |                      |
+# |  + g2 (gre)         |                      |
+# |    loc=192.0.2.66   |                      |
+# |    rem=192.0.2.65 --'                      |
+# |    tos=inherit                             |
+# |                                            |
+# |               $ol4 +                       |
+# |       192.0.4.1/24 |                   SW4 |
+# +--------------------|-----------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |  192.0.4.{2-62}/24        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	multipath_ipv4
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.3.2/24
+	ip route add vrf v$h1 192.0.4.0/24 via 192.0.3.1
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.4.0/24 via 192.0.3.1
+	simple_if_fini $h1 192.0.3.2/24
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.3.1/24
+	__simple_if_init $ul1 v$ol1 192.0.2.129/28
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	ip route add vrf v$ol1 192.0.4.0/24 nexthop dev g1
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 192.0.4.0/24
+
+	ip route del vrf v$ol1 192.0.2.66/32
+	__simple_if_fini g1 192.0.2.65/32
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 192.0.2.129/28
+	simple_if_fini $ol1 192.0.3.1/24
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 192.0.2.130/28
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 192.0.2.145/28
+	vlan_create $ul22 222 v$ul21 192.0.2.161/28
+
+	ip route add vrf v$ul21 192.0.2.65/32 via 192.0.2.129
+	ip route add vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+}
+
+sw2_destroy()
+{
+	ip route del vrf v$ul21 192.0.2.66/32
+	ip route del vrf v$ul21 192.0.2.65/32
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 192.0.2.130/28
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 192.0.2.177/28
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 192.0.2.146/28
+	vlan_create $ul32 222 v$ul31 192.0.2.162/28
+
+	ip route add vrf v$ul31 192.0.2.66/32 via 192.0.2.178
+	ip route add vrf v$ul31 192.0.2.65/32 \
+	   nexthop via 192.0.2.145 \
+	   nexthop via 192.0.2.161
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip route del vrf v$ul31 192.0.2.65/32
+	ip route del vrf v$ul31 192.0.2.66/32
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 192.0.2.177/28
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 192.0.4.1/24
+	__simple_if_init $ul4 v$ol4 192.0.2.178/28
+
+	tunnel_create g2 gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 192.0.2.66/32
+	ip route add vrf v$ol4 192.0.2.65/32 via 192.0.2.177
+
+	ip route add vrf v$ol4 192.0.3.0/24 nexthop dev g2
+}
+
+sw4_destroy()
+{
+	ip route del vrf v$ol4 192.0.3.0/24
+
+	ip route del vrf v$ol4 192.0.2.65/32
+	__simple_if_fini g2 192.0.2.66/32
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 192.0.2.178/28
+	simple_if_fini $ol4 192.0.4.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.4.2/24
+	ip route add vrf v$h2 192.0.3.0/24 via 192.0.4.1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.3.0/24 via 192.0.4.1
+	simple_if_fini $h2 192.0.4.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 weight $weight1 \
+	   nexthop via 192.0.2.162 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.4.2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 over GRE over IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
new file mode 100755
index 000000000000..e449475c4d3e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
@@ -0,0 +1,306 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv4
+# GRE tunnel. The tunnel carries IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |  2001:db8:1::2/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |  2001:db8:1::1/64                          |
+# |                                            |
+# |  + g1 (gre)                                |
+# |    loc=192.0.2.65                          |
+# |    rem=192.0.2.66 --.                      |
+# |    tos=inherit      |                      |
+# |                     v                      |
+# |                     + $ul1                 |
+# |                     | 192.0.2.129/28       |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# | SW2                 |                      |
+# |               $ul21 +                      |
+# |      192.0.2.130/28                        |
+# |                   |                        |
+# !   ________________|_____                   |
+# |  /                      \                  |
+# |  |                      |                  |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan) |
+# |  | 192.0.2.145/28       | 192.0.2.161/28   |
+# |  |                      |                  |
+# +--|----------------------|------------------+
+#    |                      |
+# +--|----------------------|------------------+
+# |  |                      |                  |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan) |
+# |  | 192.0.2.146/28       | 192.0.2.162/28   |
+# |  |                      |                  |
+# |  \______________________/                  |
+# |                   |                        |
+# |                   |                        |
+# |               $ul31 +                      |
+# |      192.0.2.177/28 |                  SW3 |
+# +---------------------|----------------------+
+#                       |
+# +---------------------|----------------------+
+# |                     + $ul4                 |
+# |                     ^ 192.0.2.178/28       |
+# |                     |                      |
+# |  + g2 (gre)         |                      |
+# |    loc=192.0.2.66   |                      |
+# |    rem=192.0.2.65 --'                      |
+# |    tos=inherit                             |
+# |                                            |
+# |               $ol4 +                       |
+# |   2001:db8:2::1/64 |                   SW4 |
+# +--------------------|-----------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |   2001:db8:2::2/64        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv6
+	multipath_ipv6
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::2/64
+	ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+	simple_if_fini $h1 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 192.0.2.129/28
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+
+	ip route del vrf v$ol1 192.0.2.66/32
+	__simple_if_fini g1 192.0.2.65/32
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 192.0.2.129/28
+	simple_if_fini $ol1 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 192.0.2.130/28
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 192.0.2.145/28
+	vlan_create $ul22 222 v$ul21 192.0.2.161/28
+
+	ip route add vrf v$ul21 192.0.2.65/32 via 192.0.2.129
+	ip route add vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+}
+
+sw2_destroy()
+{
+	ip route del vrf v$ul21 192.0.2.66/32
+	ip route del vrf v$ul21 192.0.2.65/32
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 192.0.2.130/28
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 192.0.2.177/28
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 192.0.2.146/28
+	vlan_create $ul32 222 v$ul31 192.0.2.162/28
+
+	ip route add vrf v$ul31 192.0.2.66/32 via 192.0.2.178
+	ip route add vrf v$ul31 192.0.2.65/32 \
+	   nexthop via 192.0.2.145 \
+	   nexthop via 192.0.2.161
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip route del vrf v$ul31 192.0.2.65/32
+	ip route del vrf v$ul31 192.0.2.66/32
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 192.0.2.177/28
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 192.0.2.178/28
+
+	tunnel_create g2 gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 192.0.2.66/32
+	ip route add vrf v$ol4 192.0.2.65/32 via 192.0.2.177
+
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+
+	ip route del vrf v$ol4 192.0.2.65/32
+	__simple_if_fini g2 192.0.2.66/32
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 192.0.2.178/28
+	simple_if_fini $ol4 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:db8:2::2/64
+	ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	simple_if_fini $h2 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 weight $weight1 \
+	   nexthop via 192.0.2.162 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
+	       -B "2001:db8:2::2-2001:db8:2::1e" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 192.0.2.66/32 \
+	   nexthop via 192.0.2.146 \
+	   nexthop via 192.0.2.162
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv6()
+{
+	ping_test $h1 2001:db8:2::2
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 over GRE over IPv4 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
new file mode 100755
index 000000000000..a257979d3fc5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
@@ -0,0 +1,304 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv6
+# GRE tunnel. The tunnel carries IPv4 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# | 192.0.3.{2-62}/24 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-------------------------+
+# | SW1               |                         |
+# |              $ol1 +                         |
+# |      192.0.3.1/24                           |
+# |                                             |
+# |  + g1 (gre)                                 |
+# |    loc=2001:db8:40::1                       |
+# |    rem=2001:db8:40::2 --.                   |
+# |    tos=inherit          |                   |
+# |                         v                   |
+# |                         + $ul1              |
+# |                         | 2001:db8:80::1/64 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# | SW2                     |                   |
+# |                   $ul21 +                   |
+# |       2001:db8:80::2/64                     |
+# |                   |                         |
+# !   ________________|_____                    |
+# |  /                      \                   |
+# |  |                      |                   |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan)  |
+# |  | 2001:db8:81::1/64    | 2001:db8:82::1/64 |
+# |  |                      |                   |
+# +--|----------------------|-------------------+
+#    |                      |
+# +--|----------------------|-------------------+
+# |  |                      |                   |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan)  |
+# |  | 2001:db8:81::2/64    | 2001:db8:82::2/64 |
+# |  |                      |                   |
+# |  \______________________/                   |
+# |                   |                         |
+# |                   |                         |
+# |                   $ul31 +                   |
+# |       2001:db8:83::2/64 |               SW3 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# |                         + $ul4              |
+# |                         ^ 2001:db8:83::1/64 |
+# |  + g2 (gre)             |                   |
+# |    loc=2001:db8:40::2   |                   |
+# |    rem=2001:db8:40::1 --'                   |
+# |    tos=inherit                              |
+# |                                             |
+# |               $ol4 +                        |
+# |       192.0.4.1/24 |                    SW4 |
+# +--------------------|------------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |  192.0.4.{2-62}/24        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	multipath_ipv4
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.3.2/24
+	ip route add vrf v$h1 192.0.4.0/24 via 192.0.3.1
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.4.0/24 via 192.0.3.1
+	simple_if_fini $h1 192.0.3.2/24
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.3.1/24
+	__simple_if_init $ul1 v$ol1 2001:db8:80::1/64
+
+	tunnel_create g1 ip6gre 2001:db8:40::1 2001:db8:40::2 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 2001:db8:40::1/128
+	ip -6 route add vrf v$ol1 2001:db8:40::2/128 via 2001:db8:80::2
+
+	ip route add vrf v$ol1 192.0.4.0/24 nexthop dev g1
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 192.0.4.0/24
+
+	ip -6 route del vrf v$ol1 2001:db8:40::2/128
+	__simple_if_fini g1 2001:db8:40::1/128
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 2001:db8:80::1/64
+	simple_if_fini $ol1 192.0.3.1/24
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 2001:db8:80::2/64
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 2001:db8:81::1/64
+	vlan_create $ul22 222 v$ul21 2001:db8:82::1/64
+
+	ip -6 route add vrf v$ul21 2001:db8:40::1/128 via 2001:db8:80::1
+	ip -6 route add vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$ul21 2001:db8:40::2/128
+	ip -6 route del vrf v$ul21 2001:db8:40::1/128
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 2001:db8:80::2/64
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 2001:db8:83::2/64
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 2001:db8:81::2/64
+	vlan_create $ul32 222 v$ul31 2001:db8:82::2/64
+
+	ip -6 route add vrf v$ul31 2001:db8:40::2/128 via 2001:db8:83::1
+	ip -6 route add vrf v$ul31 2001:db8:40::1/128 \
+	   nexthop via 2001:db8:81::1 \
+	   nexthop via 2001:db8:82::1
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip -6 route del vrf v$ul31 2001:db8:40::1/128
+	ip -6 route del vrf v$ul31 2001:db8:40::2/128
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 2001:Db8:83::2/64
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 192.0.4.1/24
+	__simple_if_init $ul4 v$ol4 2001:db8:83::1/64
+
+	tunnel_create g2 ip6gre 2001:db8:40::2 2001:db8:40::1 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 2001:db8:40::2/128
+	ip -6 route add vrf v$ol4 2001:db8:40::1/128 via 2001:db8:83::2
+
+	ip route add vrf v$ol4 192.0.3.0/24 nexthop dev g2
+}
+
+sw4_destroy()
+{
+	ip route del vrf v$ol4 192.0.3.0/24
+
+	ip -6 route del vrf v$ol4 2001:db8:40::1/128
+	__simple_if_fini g2 2001:db8:40::2/128
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 2001:db8:83::1/64
+	simple_if_fini $ol4 192.0.4.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.4.2/24
+	ip route add vrf v$h2 192.0.3.0/24 via 192.0.4.1
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.3.0/24 via 192.0.4.1
+	simple_if_fini $h2 192.0.4.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 weight $weight1 \
+	   nexthop via 2001:db8:82::2 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.4.2
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 over GRE over IPv6 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
new file mode 100755
index 000000000000..d208f5243ade
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
@@ -0,0 +1,305 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when there are multiple routes between an IPv6
+# GRE tunnel. The tunnel carries IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |  2001:db8:1::2/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-------------------------+
+# | SW1               |                         |
+# |              $ol1 +                         |
+# |  2001:db8:1::1/64                           |
+# |                                             |
+# |  + g1 (gre)                                 |
+# |    loc=2001:db8:40::1                       |
+# |    rem=2001:db8:40::2 --.                   |
+# |    tos=inherit          |                   |
+# |                         v                   |
+# |                         + $ul1              |
+# |                         | 2001:db8:80::1/64 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# | SW2                     |                   |
+# |                   $ul21 +                   |
+# |       2001:db8:80::2/64                     |
+# |                   |                         |
+# !   ________________|_____                    |
+# |  /                      \                   |
+# |  |                      |                   |
+# |  + $ul22.111 (vlan)     + $ul22.222 (vlan)  |
+# |  | 2001:db8:81::1/64    | 2001:db8:82::1/64 |
+# |  |                      |                   |
+# +--|----------------------|-------------------+
+#    |                      |
+# +--|----------------------|-------------------+
+# |  |                      |                   |
+# |  + $ul32.111 (vlan)     + $ul32.222 (vlan)  |
+# |  | 2001:db8:81::2/64    | 2001:db8:82::2/64 |
+# |  |                      |                   |
+# |  \______________________/                   |
+# |                   |                         |
+# |                   |                         |
+# |                   $ul31 +                   |
+# |       2001:db8:83::2/64 |               SW3 |
+# +-------------------------|-------------------+
+#                           |
+# +-------------------------|-------------------+
+# |                         + $ul4              |
+# |                         ^ 2001:db8:83::1/64 |
+# |  + g2 (gre)             |                   |
+# |    loc=2001:db8:40::2   |                   |
+# |    rem=2001:db8:40::1 --'                   |
+# |    tos=inherit                              |
+# |                                             |
+# |               $ol4 +                        |
+# |   2001:db8:2::1/64 |                    SW4 |
+# +--------------------|------------------------+
+#                      |
+# +--------------------|---------+
+# |                    |         |
+# |                $h2 +         |
+# |   2001:db8:2::2/64        H2 |
+# +------------------------------+
+
+ALL_TESTS="
+	ping_ipv6
+	multipath_ipv6
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::2/64
+	ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::1
+	simple_if_fini $h1 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 2001:db8:1::1/64
+	__simple_if_init $ul1 v$ol1 2001:db8:80::1/64
+
+	tunnel_create g1 ip6gre 2001:db8:40::1 2001:db8:40::2 tos inherit dev v$ol1
+	__simple_if_init g1 v$ol1 2001:db8:40::1/128
+	ip -6 route add vrf v$ol1 2001:db8:40::2/128 via 2001:db8:80::2
+
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+
+	ip -6 route del vrf v$ol1 2001:db8:40::2/128
+	__simple_if_fini g1 2001:db8:40::1/128
+	tunnel_destroy g1
+
+	__simple_if_fini $ul1 2001:db8:80::1/64
+	simple_if_fini $ol1 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+	simple_if_init $ul21 2001:db8:80::2/64
+	__simple_if_init $ul22 v$ul21
+	vlan_create $ul22 111 v$ul21 2001:db8:81::1/64
+	vlan_create $ul22 222 v$ul21 2001:db8:82::1/64
+
+	ip -6 route add vrf v$ul21 2001:db8:40::1/128 via 2001:db8:80::1
+	ip -6 route add vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+}
+
+sw2_destroy()
+{
+	ip -6 route del vrf v$ul21 2001:db8:40::2/128
+	ip -6 route del vrf v$ul21 2001:db8:40::1/128
+
+	vlan_destroy $ul22 222
+	vlan_destroy $ul22 111
+	__simple_if_fini $ul22
+	simple_if_fini $ul21 2001:db8:80::2/64
+}
+
+sw3_create()
+{
+	simple_if_init $ul31 2001:db8:83::2/64
+	__simple_if_init $ul32 v$ul31
+	vlan_create $ul32 111 v$ul31 2001:db8:81::2/64
+	vlan_create $ul32 222 v$ul31 2001:db8:82::2/64
+
+	ip -6 route add vrf v$ul31 2001:db8:40::2/128 via 2001:db8:83::1
+	ip -6 route add vrf v$ul31 2001:db8:40::1/128 \
+	   nexthop via 2001:db8:81::1 \
+	   nexthop via 2001:db8:82::1
+
+	tc qdisc add dev $ul32 clsact
+	tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+	tc qdisc del dev $ul32 clsact
+
+	ip -6 route del vrf v$ul31 2001:db8:40::1/128
+	ip -6 route del vrf v$ul31 2001:db8:40::2/128
+
+	vlan_destroy $ul32 222
+	vlan_destroy $ul32 111
+	__simple_if_fini $ul32
+	simple_if_fini $ul31 2001:Db8:83::2/64
+}
+
+sw4_create()
+{
+	simple_if_init $ol4 2001:db8:2::1/64
+	__simple_if_init $ul4 v$ol4 2001:db8:83::1/64
+
+	tunnel_create g2 ip6gre 2001:db8:40::2 2001:db8:40::1 tos inherit dev v$ol4
+	__simple_if_init g2 v$ol4 2001:db8:40::2/128
+	ip -6 route add vrf v$ol4 2001:db8:40::1/128 via 2001:db8:83::2
+
+	ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+	ip -6 route del vrf v$ol4 2001:db8:1::/64
+
+	ip -6 route del vrf v$ol4 2001:db8:40::1/128
+	__simple_if_fini g2 2001:db8:40::2/128
+	tunnel_destroy g2
+
+	__simple_if_fini $ul4 2001:db8:83::1/64
+	simple_if_fini $ol4 2001:db8:2::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 2001:db8:2::2/64
+	ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+	simple_if_fini $h2 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+
+	ol1=${NETIFS[p2]}
+	ul1=${NETIFS[p3]}
+
+	ul21=${NETIFS[p4]}
+	ul22=${NETIFS[p5]}
+
+	ul32=${NETIFS[p6]}
+	ul31=${NETIFS[p7]}
+
+	ul4=${NETIFS[p8]}
+	ol4=${NETIFS[p9]}
+
+	h2=${NETIFS[p10]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	sw3_create
+	sw4_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw4_destroy
+	sw3_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath6_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv6.fib_multipath_hash_policy 2
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 weight $weight1 \
+	   nexthop via 2001:db8:82::2 weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
+	       -B "2001:db8:2::2-2001:db8:2::1e" \
+	       -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+	sleep 1
+
+	local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ul21 2001:db8:40::2/128 \
+	   nexthop via 2001:db8:81::2 \
+	   nexthop via 2001:db8:82::2
+	sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv6()
+{
+	ping_test $h1 2001:db8:2::2
+}
+
+multipath_ipv6()
+{
+	log_info "Running IPv6 over GRE over IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-59-g8ed1b


From 6fb8dbca8e9e6de79ce25272eadd9cd96e05debf Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Sun, 7 Jul 2019 15:01:58 +0100
Subject: tc-tests: actions: add MPLS tests

Add a new series of selftests to verify the functionality of act_mpls in
TC.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |    1 +
 .../tc-testing/tc-tests/actions/mpls.json          | 1088 ++++++++++++++++++++
 2 files changed, 1089 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 1adc4f9bb795..7c551968d184 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -42,6 +42,7 @@ CONFIG_NET_ACT_CTINFO=m
 CONFIG_NET_ACT_SKBMOD=m
 CONFIG_NET_ACT_IFE=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_MPLS=m
 CONFIG_NET_IFE_SKBMARK=m
 CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
new file mode 100644
index 000000000000..e31a080edc49
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
@@ -0,0 +1,1088 @@
+[
+    {
+        "id": "a933",
+        "name": "Add MPLS dec_ttl action with pipe opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl pipe index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*pipe.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "08d1",
+        "name": "Add mpls dec_ttl action with pass opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl pass index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mpls index 8",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*pass.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "d786",
+        "name": "Add mpls dec_ttl action with drop opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl drop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mpls index 8",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*drop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "f334",
+        "name": "Add mpls dec_ttl action with reclassify opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl reclassify index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mpls index 8",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*reclassify.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "29bd",
+        "name": "Add mpls dec_ttl action with continue opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl continue index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mpls index 8",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*continue.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "48df",
+        "name": "Add mpls dec_ttl action with jump opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl jump 10 index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*jump 10.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "62eb",
+        "name": "Add mpls dec_ttl action with trap opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl trap index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl trap.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "9118",
+        "name": "Add mpls dec_ttl action with invalid opcode",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl foo index 8",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*foo.*index 8 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6ce1",
+        "name": "Add mpls dec_ttl action with label (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl label 20",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*label.*20.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "352f",
+        "name": "Add mpls dec_ttl action with tc (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl tc 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*tc.*3.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "fa1c",
+        "name": "Add mpls dec_ttl action with ttl (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl ttl 20",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*ttl.*20.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6b79",
+        "name": "Add mpls dec_ttl action with bos (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls dec_ttl bos 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*dec_ttl.*bos.*1.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "d4c4",
+        "name": "Add mpls pop action with ip proto",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*protocol.*ip.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "92fe",
+        "name": "Add mpls pop action with mpls proto",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop protocol mpls_mc",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*protocol.*mpls_mc.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "7e23",
+        "name": "Add mpls pop action with no protocol (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6182",
+        "name": "Add mpls pop action with label (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4 label 20",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*label.*20.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6475",
+        "name": "Add mpls pop action with tc (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4 tc 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*tc.*3.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "067b",
+        "name": "Add mpls pop action with ttl (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4 ttl 20",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*ttl.*20.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "7316",
+        "name": "Add mpls pop action with bos (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4 bos 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*bos.*1.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "38cc",
+        "name": "Add mpls push action with label",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push label 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*ttl.*[0-9]+.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "c281",
+        "name": "Add mpls push action with mpls_mc protocol",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push protocol mpls_mc label 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_mc.*label.*20.*ttl.*[0-9]+.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "5db4",
+        "name": "Add mpls push action with label, tc and ttl",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push label 20 tc 3 ttl 128",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*tc.*3.*ttl.*128.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "16eb",
+        "name": "Add mpls push action with label and bos",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push label 20 bos 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*bos.*1.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "d69d",
+        "name": "Add mpls push action with no label (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "e8e4",
+        "name": "Add mpls push action with ipv4 protocol (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push protocol ipv4 label 20",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*ttl.*[0-9]+.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ecd0",
+        "name": "Add mpls push action with out of range label (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push label 1048576",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*1048576.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "d303",
+        "name": "Add mpls push action with out of range tc (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push label 20 tc 8",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*tc.*8.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "fd6e",
+        "name": "Add mpls push action with ttl of 0 (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls push label 20 ttl 0",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*ttl.*0.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "19e9",
+        "name": "Add mpls mod action with mpls label",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod label 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*label.*20.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "1fde",
+        "name": "Add mpls mod action with max mpls label",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod label 0xfffff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*label.*1048575.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "0c50",
+        "name": "Add mpls mod action with mpls label exceeding max (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod label 0x100000",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*label.*1048576.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "10b6",
+        "name": "Add mpls mod action with mpls label of MPLS_LABEL_IMPLNULL (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod label 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*label.*3.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "57c9",
+        "name": "Add mpls mod action with mpls min tc",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod tc 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*tc.*0.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "6872",
+        "name": "Add mpls mod action with mpls max tc",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod tc 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*tc.*7.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "a70a",
+        "name": "Add mpls mod action with mpls tc exceeding max (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod tc 8",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*tc.*4.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6ed5",
+        "name": "Add mpls mod action with mpls ttl",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod ttl 128",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*128.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "b80f",
+        "name": "Add mpls mod action with mpls max ttl",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod ttl 255",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*255.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "8864",
+        "name": "Add mpls mod action with mpls min ttl",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod ttl 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*1.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "6c06",
+        "name": "Add mpls mod action with mpls ttl of 0 (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod ttl 0",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*0.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "b5d8",
+        "name": "Add mpls mod action with mpls ttl exceeding max (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod ttl 256",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*256.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "451f",
+        "name": "Add mpls mod action with mpls max bos",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod bos 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*bos.*1.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "a1ed",
+        "name": "Add mpls mod action with mpls min bos",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod bos 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*bos.*0.*pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "3dcf",
+        "name": "Add mpls mod action with mpls bos exceeding max (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod bos 2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*bos.*2.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "db7c",
+        "name": "Add mpls mod action with protocol (invalid)",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mpls mod protocol ipv4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*modify.*protocol.*ip.*pipe",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "b070",
+        "name": "Replace existing mpls push action with new ID",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mpls push label 20 pipe index 12"
+        ],
+        "cmdUnderTest": "$TC actions replace action mpls push label 30 pipe index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mpls index 12",
+        "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*30.*pipe.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mpls"
+        ]
+    },
+    {
+        "id": "6cce",
+        "name": "Delete mpls pop action",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mpls pop protocol ipv4 index 44"
+        ],
+        "cmdUnderTest": "$TC actions del action mpls index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*pop.*index 44 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "d138",
+        "name": "Flush mpls actions",
+        "category": [
+            "actions",
+            "mpls"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mpls",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mpls push label 10 index 10",
+            "$TC actions add action mpls push label 20 index 20",
+            "$TC actions add action mpls push label 30 index 30",
+            "$TC actions add action mpls push label 40 index 40"
+        ],
+        "cmdUnderTest": "$TC actions flush action mpls",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mpls",
+        "matchPattern": "action order [0-9]+: mpls.*push.*",
+        "matchCount": "0",
+        "teardown": []
+    }
+]
-- 
cgit v1.2.3-59-g8ed1b


From 6e52fca36c67c608fd8590826e946e1d0da83261 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Tue, 9 Jul 2019 10:30:51 +0300
Subject: tc-tests: Add tc action ct tests

Add 13 tests ensuring the command line is doing what is supposed to do.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/actions/ct.json  | 314 +++++++++++++++++++++
 1 file changed, 314 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/ct.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
new file mode 100644
index 000000000000..62b82fe10c89
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -0,0 +1,314 @@
+[
+    {
+        "id": "696a",
+        "name": "Add simple ct action",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct index 42",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct zone 0 pipe.*index 42 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "9f20",
+        "name": "Add ct clear action",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct clear index 42",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct clear pipe.*index 42 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "5bea",
+        "name": "Try ct with zone",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct zone 404 index 42",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct zone 404 pipe.*index 42 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "d5d6",
+        "name": "Try ct with zone, commit",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct zone 404 commit index 42",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct commit zone 404 pipe.*index 42 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "029f",
+        "name": "Try ct with zone, commit, mark",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct zone 404 commit mark 0x42 index 42",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct commit mark 66 zone 404 pipe.*index 42 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "a58d",
+        "name": "Try ct with zone, commit, mark, nat",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct zone 404 commit mark 0x42 nat src addr 5.5.5.7 index 42",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct commit mark 66 zone 404 nat src addr 5.5.5.7 pipe.*index 42 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "901b",
+        "name": "Try ct with full nat ipv4 range syntax",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct commit nat src addr 5.5.5.7-5.5.6.0 port 1000-2000 index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct commit zone 0 nat src addr 5.5.5.7-5.5.6.0 port 1000-2000 pipe.*index 44 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "072b",
+        "name": "Try ct with full nat ipv6 syntax",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct commit nat src addr 2001::1 port 1000-2000 index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct commit zone 0 nat src addr 2001::1 port 1000-2000 pipe.*index 44 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "3420",
+        "name": "Try ct with full nat ipv6 range syntax",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct commit nat src addr 2001::1-2001::10 port 1000-2000 index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct commit zone 0 nat src addr 2001::1-2001::10 port 1000-2000 pipe.*index 44 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "4470",
+        "name": "Try ct with full nat ipv6 range syntax + force",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct commit force nat src addr 2001::1-2001::10 port 1000-2000 index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct commit force zone 0 nat src addr 2001::1-2001::10 port 1000-2000 pipe.*index 44 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "5d88",
+        "name": "Try ct with label",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct label 123123 index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct zone 0 label 12312300000000000000000000000000 pipe.*index 44 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "04d4",
+        "name": "Try ct with label with mask",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct label 12312300000000000000000000000001/ffffffff000000000000000000000001 index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct zone 0 label 12312300000000000000000000000001/ffffffff000000000000000000000001 pipe.*index 44 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    },
+    {
+        "id": "9751",
+        "name": "Try ct with mark + mask",
+        "category": [
+            "actions",
+            "ct"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ct",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ct mark 0x42/0xf0 index 42",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action ct",
+        "matchPattern": "action order [0-9]*: ct mark 66/0xf0 zone 0 pipe.*index 42 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ct"
+        ]
+    }
+]
-- 
cgit v1.2.3-59-g8ed1b


From a7d50a0dd81b09dc13fa9e4b55765e8684bc8226 Mon Sep 17 00:00:00 2001
From: Lucas Bates <lucasb@mojatatu.com>
Date: Mon, 8 Jul 2019 21:34:26 -0400
Subject: tc-testing: Allow tdc plugins to see test case data

Instead of only passing the test case name and ID, pass the
entire current test case down to the plugins. This change
allows plugins to start accepting commands and directives
from the test cases themselves, for greater flexibility
in testing.

Signed-off-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/TdcPlugin.py |  5 ++---
 tools/testing/selftests/tc-testing/tdc.py       | 10 +++++-----
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
index b980a565fa89..79f3ca8617c9 100644
--- a/tools/testing/selftests/tc-testing/TdcPlugin.py
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -18,12 +18,11 @@ class TdcPlugin:
         if self.args.verbose > 1:
             print(' -- {}.post_suite'.format(self.sub_class))
 
-    def pre_case(self, testid, test_name, test_skip):
+    def pre_case(self, caseinfo, test_skip):
         '''run commands before test_runner does one test'''
         if self.args.verbose > 1:
             print(' -- {}.pre_case'.format(self.sub_class))
-        self.args.testid = testid
-        self.args.test_name = test_name
+        self.args.caseinfo = caseinfo
         self.args.test_skip = test_skip
 
     def post_case(self):
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 678182a2676d..f04321ace9fb 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -122,15 +122,15 @@ class PluginMgr:
         for pgn_inst in reversed(self.plugin_instances):
             pgn_inst.post_suite(index)
 
-    def call_pre_case(self, testid, test_name, *, test_skip=False):
+    def call_pre_case(self, caseinfo, *, test_skip=False):
         for pgn_inst in self.plugin_instances:
             try:
-                pgn_inst.pre_case(testid, test_name, test_skip)
+                pgn_inst.pre_case(caseinfo, test_skip)
             except Exception as ee:
                 print('exception {} in call to pre_case for {} plugin'.
                       format(ee, pgn_inst.__class__))
                 print('test_ordinal is {}'.format(test_ordinal))
-                print('testid is {}'.format(testid))
+                print('testid is {}'.format(caseinfo['id']))
                 raise
 
     def call_post_case(self):
@@ -261,14 +261,14 @@ def run_one_test(pm, args, index, tidx):
             res = TestResult(tidx['id'], tidx['name'])
             res.set_result(ResultState.skip)
             res.set_errormsg('Test case designated as skipped.')
-            pm.call_pre_case(tidx['id'], tidx['name'], test_skip=True)
+            pm.call_pre_case(tidx, test_skip=True)
             pm.call_post_execute()
             return res
 
     # populate NAMES with TESTID for this test
     NAMES['TESTID'] = tidx['id']
 
-    pm.call_pre_case(tidx['id'], tidx['name'])
+    pm.call_pre_case(tidx)
     prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
 
     if (args.verbose > 0):
-- 
cgit v1.2.3-59-g8ed1b


From 14e5175e9e04a982ef044a0dcdf1484643df1d3a Mon Sep 17 00:00:00 2001
From: Lucas Bates <lucasb@mojatatu.com>
Date: Mon, 8 Jul 2019 21:34:27 -0400
Subject: tc-testing: introduce scapyPlugin for basic traffic

The scapyPlugin allows for simple traffic generation in tdc to
test various tc features. It was tested with scapy v2.4.2, but
should work with any successive version.

In order to use the plugin's functionality, scapy must be
installed. This can be done with:
   pip3 install scapy

or to install 2.4.2:
   pip3 install scapy==2.4.2

If the plugin is unable to import the scapy module, it will
terminate the tdc run.

The plugin makes use of a new key in the test case data, 'scapy'.
This block contains three other elements: 'iface', 'count', and
'packet':

        "scapy": {
            "iface": "$DEV0",
            "count": 1,
            "packet": "Ether(type=0x800)/IP(src='16.61.16.61')/ICMP()"
        },

* iface is the name of the device on the host machine from which
  the packet(s) will be sent. Values contained within tdc_config.py's
  NAMES dict can be used here - this is useful if paired with
  nsPlugin
* count is the number of copies of this packet to be sent
* packet is a string detailing the different layers of the packet
  to be sent. If a property isn't explicitly set, scapy will set
  default values for you.

Layers in the packet info are separated by slashes. For info about
common TCP and IP properties, see:
https://blogs.sans.org/pen-testing/files/2016/04/ScapyCheatSheet_v0.2.pdf

Caution is advised when running tests using the scapy functionality,
since the plugin blindly sends the packet as defined in the test case
data.

See creating-testcases/scapy-example.json for sample test cases;
the first test is intended to pass while the second is intended to
fail.

Signed-off-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../creating-testcases/scapy-example.json          | 98 ++++++++++++++++++++++
 .../selftests/tc-testing/plugin-lib/scapyPlugin.py | 50 +++++++++++
 2 files changed, 148 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/creating-testcases/scapy-example.json
 create mode 100644 tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/creating-testcases/scapy-example.json b/tools/testing/selftests/tc-testing/creating-testcases/scapy-example.json
new file mode 100644
index 000000000000..5a9377b72d7f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/scapy-example.json
@@ -0,0 +1,98 @@
+[
+    {
+        "id": "b1e9",
+        "name": "Test matching of source IP",
+        "category": [
+            "actions",
+            "scapy"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin",
+                "scapyPlugin"
+            ]
+        },
+        "setup": [
+            [
+                "$TC qdisc del dev $DEV1 ingress",
+                0,
+                1,
+                2,
+                255
+            ],
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 protocol ip flower src_ip 16.61.16.61 flowid 1:1 action ok",
+        "scapy": {
+            "iface": "$DEV0",
+            "count": 1,
+            "packet": "Ether(type=0x800)/IP(src='16.61.16.61')/ICMP()"
+        },
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s -j filter ls dev $DEV1 ingress prio 3",
+        "matchJSON": [
+            {
+                "path": [
+                    1,
+                    "options",
+                    "actions",
+                    0,
+                    "stats",
+                    "packets"
+                ],
+                "value": 1
+            }
+        ],
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e9c4",
+        "name": "Test matching of source IP with wrong count",
+        "category": [
+            "actions",
+            "scapy"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin",
+                "scapyPlugin"
+            ]
+        },
+        "setup": [
+            [
+                "$TC qdisc del dev $DEV1 ingress",
+                0,
+                1,
+                2,
+                255
+            ],
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 protocol ip flower src_ip 16.61.16.61 flowid 1:1 action ok",
+        "scapy": {
+            "iface": "$DEV0",
+            "count": 3,
+            "packet": "Ether(type=0x800)/IP(src='16.61.16.61')/ICMP()"
+        },
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s -j filter ls dev $DEV1 parent ffff:",
+        "matchJSON": [
+            {
+                "path": [
+                    1,
+                    "options",
+                    "actions",
+                    0,
+                    "stats",
+                    "packets"
+                ],
+                "value": 1
+            }
+        ],
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
new file mode 100644
index 000000000000..229ee185b27e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+try:
+    from scapy.all import *
+except ImportError:
+    print("Unable to import the scapy python module.")
+    print("\nIf not already installed, you may do so with:")
+    print("\t\tpip3 install scapy==2.4.2")
+    exit(1)
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'scapy/SubPlugin'
+        super().__init__()
+
+    def post_execute(self):
+        if 'scapy' not in self.args.caseinfo:
+            if self.args.verbose:
+                print('{}.post_execute: no scapy info in test case'.format(self.sub_class))
+            return
+
+        # Check for required fields
+        scapyinfo = self.args.caseinfo['scapy']
+        scapy_keys = ['iface', 'count', 'packet']
+        missing_keys = []
+        keyfail = False
+        for k in scapy_keys:
+            if k not in scapyinfo:
+                keyfail = True
+                missing_keys.add(k)
+        if keyfail:
+            print('{}: Scapy block present in the test, but is missing info:'
+                .format(self.sub_class))
+            print('{}'.format(missing_keys))
+
+        pkt = eval(scapyinfo['packet'])
+        if '$' in scapyinfo['iface']:
+            tpl = Template(scapyinfo['iface'])
+            scapyinfo['iface'] = tpl.safe_substitute(NAMES)
+        for count in range(scapyinfo['count']):
+            sendp(pkt, iface=scapyinfo['iface'])
-- 
cgit v1.2.3-59-g8ed1b