diff options
author | 2022-09-01 12:11:45 -0700 | |
---|---|---|
committer | 2022-09-01 12:16:23 -0700 | |
commit | 23d86c8e02e55e511cd37e7ef2280a0030750954 (patch) | |
tree | 9e9824013d4e2ac7974944f22f2f90dfab154814 | |
parent | Merge branch 'fixes for concurrent htab updates' (diff) | |
parent | selftests/bpf: Test concurrent updates on bpf_task_storage_busy (diff) | |
download | wireguard-linux-23d86c8e02e55e511cd37e7ef2280a0030750954.tar.xz wireguard-linux-23d86c8e02e55e511cd37e7ef2280a0030750954.zip |
Merge branch 'Use this_cpu_xxx for preemption-safety'
Hou Tao says:
====================
From: Hou Tao <houtao1@huawei.com>
Hi,
The patchset aims to make the update of per-cpu prog->active and per-cpu
bpf_task_storage_busy being preemption-safe. The problem is on same
architectures (e.g. arm64), __this_cpu_{inc|dec|inc_return} are neither
preemption-safe nor IRQ-safe, so under fully preemptible kernel the
concurrent updates on these per-cpu variables may be interleaved and the
final values of these variables may be not zero.
Patch 1 & 2 use the preemption-safe per-cpu helpers to manipulate
prog->active and bpf_task_storage_busy. Patch 3 & 4 add a test case in
map_tests to show the concurrent updates on the per-cpu
bpf_task_storage_busy by using __this_cpu_{inc|dec} are not atomic.
Comments are always welcome.
Regards,
Tao
Change Log:
v2:
* Patch 1: update commit message to indicate the problem is only
possible for fully preemptible kernel
* Patch 2: a new patch which fixes the problem for prog->active
* Patch 3 & 4: move it to test_maps and make it depend on CONFIG_PREEMPT
v1: https://lore.kernel.org/bpf/20220829142752.330094-1-houtao@huaweicloud.com/
====================
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r-- | kernel/bpf/bpf_local_storage.c | 4 | ||||
-rw-r--r-- | kernel/bpf/bpf_task_storage.c | 8 | ||||
-rw-r--r-- | kernel/bpf/trampoline.c | 8 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/map_tests/task_storage_map.c | 122 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c | 10 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/test_local_storage.c | 10 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c | 39 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/task_local_storage_helpers.h | 18 |
8 files changed, 191 insertions, 28 deletions
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 4ee2e7286c23..802fc15b0d73 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -555,11 +555,11 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem, map_node))) { if (busy_counter) { migrate_disable(); - __this_cpu_inc(*busy_counter); + this_cpu_inc(*busy_counter); } bpf_selem_unlink(selem, false); if (busy_counter) { - __this_cpu_dec(*busy_counter); + this_cpu_dec(*busy_counter); migrate_enable(); } cond_resched_rcu(); diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index e9014dc62682..6f290623347e 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -26,20 +26,20 @@ static DEFINE_PER_CPU(int, bpf_task_storage_busy); static void bpf_task_storage_lock(void) { migrate_disable(); - __this_cpu_inc(bpf_task_storage_busy); + this_cpu_inc(bpf_task_storage_busy); } static void bpf_task_storage_unlock(void) { - __this_cpu_dec(bpf_task_storage_busy); + this_cpu_dec(bpf_task_storage_busy); migrate_enable(); } static bool bpf_task_storage_trylock(void) { migrate_disable(); - if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) { - __this_cpu_dec(bpf_task_storage_busy); + if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { + this_cpu_dec(bpf_task_storage_busy); migrate_enable(); return false; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ff87e38af8a7..ad76940b02cc 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -895,7 +895,7 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_tramp_run_ctx *ru run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); - if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { + if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { inc_misses_counter(prog); return 0; } @@ -930,7 +930,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_ bpf_reset_run_ctx(run_ctx->saved_run_ctx); update_prog_stats(prog, start); - __this_cpu_dec(*(prog->active)); + this_cpu_dec(*(prog->active)); migrate_enable(); rcu_read_unlock(); } @@ -966,7 +966,7 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_r migrate_disable(); might_fault(); - if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { + if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) { inc_misses_counter(prog); return 0; } @@ -982,7 +982,7 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, bpf_reset_run_ctx(run_ctx->saved_run_ctx); update_prog_stats(prog, start); - __this_cpu_dec(*(prog->active)); + this_cpu_dec(*(prog->active)); migrate_enable(); rcu_read_unlock_trace(); } diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c new file mode 100644 index 000000000000..1adc9c292eb2 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "test_maps.h" +#include "task_local_storage_helpers.h" +#include "read_bpf_task_storage_busy.skel.h" + +struct lookup_ctx { + bool start; + bool stop; + int pid_fd; + int map_fd; + int loop; +}; + +static void *lookup_fn(void *arg) +{ + struct lookup_ctx *ctx = arg; + long value; + int i = 0; + + while (!ctx->start) + usleep(1); + + while (!ctx->stop && i++ < ctx->loop) + bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value); + return NULL; +} + +static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr) +{ + unsigned int i; + + ctx->stop = true; + ctx->start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +void test_task_storage_map_stress_lookup(void) +{ +#define MAX_NR_THREAD 4096 + unsigned int i, nr = 256, loop = 8192, cpu = 0; + struct read_bpf_task_storage_busy *skel; + pthread_t tids[MAX_NR_THREAD]; + struct lookup_ctx ctx; + cpu_set_t old, new; + const char *cfg; + int err; + + cfg = getenv("TASK_STORAGE_MAP_NR_THREAD"); + if (cfg) { + nr = atoi(cfg); + if (nr > MAX_NR_THREAD) + nr = MAX_NR_THREAD; + } + cfg = getenv("TASK_STORAGE_MAP_NR_LOOP"); + if (cfg) + loop = atoi(cfg); + cfg = getenv("TASK_STORAGE_MAP_PIN_CPU"); + if (cfg) + cpu = atoi(cfg); + + skel = read_bpf_task_storage_busy__open_and_load(); + err = libbpf_get_error(skel); + CHECK(err, "open_and_load", "error %d\n", err); + + /* Only for a fully preemptible kernel */ + if (!skel->kconfig->CONFIG_PREEMPT) + return; + + /* Save the old affinity setting */ + sched_getaffinity(getpid(), sizeof(old), &old); + + /* Pinned on a specific CPU */ + CPU_ZERO(&new); + CPU_SET(cpu, &new); + sched_setaffinity(getpid(), sizeof(new), &new); + + ctx.start = false; + ctx.stop = false; + ctx.pid_fd = sys_pidfd_open(getpid(), 0); + ctx.map_fd = bpf_map__fd(skel->maps.task); + ctx.loop = loop; + for (i = 0; i < nr; i++) { + err = pthread_create(&tids[i], NULL, lookup_fn, &ctx); + if (err) { + abort_lookup(&ctx, tids, i); + CHECK(err, "pthread_create", "error %d\n", err); + goto out; + } + } + + ctx.start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); + + skel->bss->pid = getpid(); + err = read_bpf_task_storage_busy__attach(skel); + CHECK(err, "attach", "error %d\n", err); + + /* Trigger program */ + syscall(SYS_gettid); + skel->bss->pid = 0; + + CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); +out: + read_bpf_task_storage_busy__destroy(skel); + /* Restore affinity setting */ + sched_setaffinity(getpid(), sizeof(old), &old); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c index 2559bb775762..a0054019e677 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c @@ -9,18 +9,10 @@ #include "bprm_opts.skel.h" #include "network_helpers.h" - -#ifndef __NR_pidfd_open -#define __NR_pidfd_open 434 -#endif +#include "task_local_storage_helpers.h" static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL }; -static inline int sys_pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - static int update_storage(int map_fd, int secureexec) { int task_fd, ret = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index 26ac26a88026..9c77cd6b1eaf 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -11,15 +11,7 @@ #include "local_storage.skel.h" #include "network_helpers.h" - -#ifndef __NR_pidfd_open -#define __NR_pidfd_open 434 -#endif - -static inline int sys_pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} +#include "task_local_storage_helpers.h" static unsigned int duration; diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c new file mode 100644 index 000000000000..a47bb0120719 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern bool CONFIG_PREEMPT __kconfig __weak; +extern const int bpf_task_storage_busy __ksym; + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +int busy = 0; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, long); +} task SEC(".maps"); + +SEC("raw_tp/sys_enter") +int BPF_PROG(read_bpf_task_storage_busy) +{ + int *value; + int key; + + if (!CONFIG_PREEMPT) + return 0; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + value = bpf_this_cpu_ptr(&bpf_task_storage_busy); + if (value) + busy = *value; + + return 0; +} diff --git a/tools/testing/selftests/bpf/task_local_storage_helpers.h b/tools/testing/selftests/bpf/task_local_storage_helpers.h new file mode 100644 index 000000000000..711d5abb7d51 --- /dev/null +++ b/tools/testing/selftests/bpf/task_local_storage_helpers.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TASK_LOCAL_STORAGE_HELPER_H +#define __TASK_LOCAL_STORAGE_HELPER_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open 434 +#endif + +static inline int sys_pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +#endif |