diff options
author | 2024-11-12 13:53:27 -0800 | |
---|---|---|
committer | 2024-11-12 13:53:28 -0800 | |
commit | c748a255aedfd42adc4213479f669f0f4809b85e (patch) | |
tree | ecac0eac4ab29e4524b87cb41feb5dba6f47645f /tools/testing/selftests/bpf/prog_tests/send_signal.c | |
parent | Merge branch 'libbpf-stringify-error-codes-in-log-messages' (diff) | |
parent | selftests/bpf: update send_signal to lower perf evemts frequency (diff) | |
download | wireguard-linux-c748a255aedfd42adc4213479f669f0f4809b85e.tar.xz wireguard-linux-c748a255aedfd42adc4213479f669f0f4809b85e.zip |
Merge branch 'selftests-bpf-fix-for-bpf_signal-stalls-watchdog-for-test_progs'
Eduard Zingerman says:
====================
selftests/bpf: fix for bpf_signal stalls, watchdog for test_progs
Test case 'bpf_signal' had been recently reported to stall, both on
the mailing list [1] and CI [2]. The stall is caused by CPU cycles
perf event not being delivered within expected time frame, before test
process enters system call and waits indefinitely.
This patch-set addresses the issue in several ways:
- A watchdog timer is added to test_progs.c runner:
- it prints current sub-test name to stderr if sub-test takes longer
than 10 seconds to finish;
- it terminates process executing sub-test if sub-test takes longer
than 120 seconds to finish.
- The test case is updated to await perf event notification with a
timeout and a few retries, this serves two purposes:
- busy loops longer to increase the time frame for CPU cycles event
generation/delivery;
- makes a timeout, not stall, a worst case scenario.
- The test case is updated to lower frequency of perf events, as high
frequency of such events caused events generation throttling,
which in turn delayed events delivery by amount of time sufficient
to cause test case failure.
Note:
librt pthread-based timer API is used to implement watchdog timer.
I chose this API over SIGALRM because signal handler execution
within test process context was sufficient to trigger perf event
delivery for send_signal/send_signal_nmi_thread_remote test case,
w/o any additional changes. Thus I concluded that SIGALRM based
implementation interferes with tests execution.
[1] https://lore.kernel.org/bpf/CAP01T75OUeE8E-Lw9df84dm8ag2YmHW619f1DmPSVZ5_O89+Bg@mail.gmail.com/
[2] https://github.com/kernel-patches/bpf/actions/runs/11791485271/job/32843996871
====================
Link: https://lore.kernel.org/r/20241112110906.3045278-1-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools/testing/selftests/bpf/prog_tests/send_signal.c')
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/send_signal.c | 35 |
1 files changed, 21 insertions, 14 deletions
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 1aed94ec14ef..1702aa592c2c 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -3,6 +3,7 @@ #include <sys/time.h> #include <sys/resource.h> #include "test_send_signal_kern.skel.h" +#include "io_helpers.h" static int sigusr1_received; @@ -24,6 +25,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, int pipe_c2p[2], pipe_p2c[2]; int err = -1, pmu_fd = -1; volatile int j = 0; + int retry_count; char buf[256]; pid_t pid; int old_prio; @@ -163,21 +165,25 @@ static void test_send_signal_common(struct perf_event_attr *attr, /* notify child that bpf program can send_signal now */ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); - /* For the remote test, the BPF program is triggered from this - * process but the other process/thread is signaled. - */ - if (remote) { - if (!attr) { - for (int i = 0; i < 10; i++) - usleep(1); - } else { - for (int i = 0; i < 100000000; i++) - j /= i + 1; + for (retry_count = 0;;) { + /* For the remote test, the BPF program is triggered from this + * process but the other process/thread is signaled. + */ + if (remote) { + if (!attr) { + for (int i = 0; i < 10; i++) + usleep(1); + } else { + for (int i = 0; i < 100000000; i++) + j /= i + 1; + } } + /* wait for result */ + err = read_with_timeout(pipe_c2p[0], buf, 1, 100); + if (err == -EAGAIN && retry_count++ < 10000) + continue; + break; } - - /* wait for result */ - err = read(pipe_c2p[0], buf, 1); if (!ASSERT_GE(err, 0, "reading pipe")) goto disable_pmu; if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) { @@ -223,7 +229,8 @@ static void test_send_signal_perf(bool signal_thread, bool remote) static void test_send_signal_nmi(bool signal_thread, bool remote) { struct perf_event_attr attr = { - .sample_period = 1, + .freq = 1, + .sample_freq = 1000, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; |