diff options
Diffstat (limited to '')
26 files changed, 3128 insertions, 192 deletions
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c new file mode 100644 index 000000000000..e289dd1a14ee --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include <linux/log2.h> +#include <pthread.h> +#include "bench.h" +#include "bloom_filter_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + bool use_array_map; + bool use_hashmap; + bool hashmap_use_bloom; + bool count_false_hits; + + struct bloom_filter_bench *skel; + + int bloom_fd; + int hashmap_fd; + int array_map_fd; + + pthread_mutex_t map_done_mtx; + pthread_cond_t map_done_cv; + bool map_done; + bool map_prepare_err; + + __u32 next_map_idx; +} ctx = { + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, + .map_done_cv = PTHREAD_COND_INITIALIZER, +}; + +struct stat { + __u32 stats[3]; +}; + +static struct { + __u32 nr_entries; + __u8 nr_hash_funcs; + __u8 value_size; +} args = { + .nr_entries = 1000, + .nr_hash_funcs = 3, + .value_size = 8, +}; + +enum { + ARG_NR_ENTRIES = 3000, + ARG_NR_HASH_FUNCS = 3001, + ARG_VALUE_SIZE = 3002, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Set number of expected unique entries in the bloom filter"}, + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, + "Set number of hash functions in the bloom filter"}, + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set value size (in bytes) of bloom filter entries"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_NR_HASH_FUNCS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } + args.nr_hash_funcs = ret; + break; + case ARG_VALUE_SIZE: + ret = strtol(arg, NULL, 10); + if (ret < 2 || ret > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } + args.value_size = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bloom_map_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, + "The bloom filter benchmarks do not support consumer\n"); + exit(1); + } +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +static void *map_prepare_thread(void *arg) +{ + __u32 val_size, i; + void *val = NULL; + int err; + + val_size = args.value_size; + val = malloc(val_size); + if (!val) { + ctx.map_prepare_err = true; + goto done; + } + + while (true) { + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); + if (i > args.nr_entries) + break; + +again: + /* Populate hashmap, bloom filter map, and array map with the same + * random values + */ + err = syscall(__NR_getrandom, val, val_size, 0); + if (err != val_size) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to get random value: %d\n", -errno); + break; + } + + if (ctx.use_hashmap) { + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); + if (err) { + if (err != -EEXIST) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to hashmap: %d\n", + -errno); + break; + } + goto again; + } + } + + i--; + + if (ctx.use_array_map) { + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); + break; + } + } + + if (ctx.use_hashmap && !ctx.hashmap_use_bloom) + continue; + + err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, + "failed to add elem to bloom filter map: %d\n", -errno); + break; + } + } +done: + pthread_mutex_lock(&ctx.map_done_mtx); + ctx.map_done = true; + pthread_cond_signal(&ctx.map_done_cv); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (val) + free(val); + + return NULL; +} + +static void populate_maps(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + pthread_t map_thread; + int i, err, nr_rand_bytes; + + ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map); + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); + + for (i = 0; i < nr_cpus; i++) { + err = pthread_create(&map_thread, NULL, map_prepare_thread, + NULL); + if (err) { + fprintf(stderr, "failed to create pthread: %d\n", -errno); + exit(1); + } + } + + pthread_mutex_lock(&ctx.map_done_mtx); + while (!ctx.map_done) + pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (ctx.map_prepare_err) + exit(1); + + nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals, + ctx.skel->rodata->nr_rand_bytes, 0); + if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) { + fprintf(stderr, "failed to get random bytes\n"); + exit(1); + } +} + +static void check_args(void) +{ + if (args.value_size < 8) { + __u64 nr_unique_entries = 1ULL << (args.value_size * 8); + + if (args.nr_entries > nr_unique_entries) { + fprintf(stderr, + "Not enough unique values for the nr_entries requested\n"); + exit(1); + } + } +} + +static struct bloom_filter_bench *setup_skeleton(void) +{ + struct bloom_filter_bench *skel; + + check_args(); + + setup_libbpf(); + + skel = bloom_filter_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom; + skel->rodata->count_false_hits = ctx.count_false_hits; + + /* Resize number of entries */ + bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries); + + /* Set value size */ + bpf_map__set_value_size(skel->maps.array_map, args.value_size); + + bpf_map__set_value_size(skel->maps.bloom_map, args.value_size); + + bpf_map__set_value_size(skel->maps.hashmap, args.value_size); + + /* For the hashmap, we use the value as the key as well */ + bpf_map__set_key_size(skel->maps.hashmap, args.value_size); + + skel->bss->value_size = args.value_size; + + /* Set number of hash functions */ + bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs); + + if (bloom_filter_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static void bloom_lookup_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void bloom_update_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_update); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void false_positive_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + ctx.count_false_hits = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_with_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_no_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; + static unsigned long last_hits, last_drops, last_false_hits; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int hit_key, drop_key, false_hit_key; + int i; + + hit_key = ctx.skel->rodata->hit_key; + drop_key = ctx.skel->rodata->drop_key; + false_hit_key = ctx.skel->rodata->false_hit_key; + + if (ctx.skel->bss->error != 0) { + fprintf(stderr, "error (%d) when searching the bloom filter\n", + ctx.skel->bss->error); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) { + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; + + total_hits += s->stats[hit_key]; + total_drops += s->stats[drop_key]; + total_false_hits += s->stats[false_hit_key]; + } + + res->hits = total_hits - last_hits; + res->drops = total_drops - last_drops; + res->false_hits = total_false_hits - last_false_hits; + + last_hits = total_hits; + last_drops = total_drops; + last_false_hits = total_false_hits; +} + +const struct bench bench_bloom_lookup = { + .name = "bloom-lookup", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = bloom_lookup_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_update = { + .name = "bloom-update", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = bloom_update_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_false_positive = { + .name = "bloom-false-positive", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = false_positive_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = false_hits_report_progress, + .report_final = false_hits_report_final, +}; + +const struct bench bench_hashmap_without_bloom = { + .name = "hashmap-without-bloom", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = hashmap_no_bloom_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_hashmap_with_bloom = { + .name = "hashmap-with-bloom", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = hashmap_with_bloom_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c new file mode 100644 index 000000000000..2845edaba8db --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> +#include "bench.h" +#include "crypto_bench.skel.h" + +#define MAX_CIPHER_LEN 32 +static char *input; +static struct crypto_ctx { + struct crypto_bench *skel; + int pfd; +} ctx; + +static struct crypto_args { + u32 crypto_len; + char *crypto_cipher; +} args = { + .crypto_len = 16, + .crypto_cipher = "ecb(aes)", +}; + +enum { + ARG_CRYPTO_LEN = 5000, + ARG_CRYPTO_CIPHER = 5001, +}; + +static const struct argp_option opts[] = { + { "crypto-len", ARG_CRYPTO_LEN, "CRYPTO_LEN", 0, + "Set the length of crypto buffer" }, + { "crypto-cipher", ARG_CRYPTO_CIPHER, "CRYPTO_CIPHER", 0, + "Set the cipher to use (default:ecb(aes))" }, + {}, +}; + +static error_t crypto_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_CRYPTO_LEN: + args.crypto_len = strtoul(arg, NULL, 10); + if (!args.crypto_len || + args.crypto_len > sizeof(ctx.skel->bss->dst)) { + fprintf(stderr, "Invalid crypto buffer len (limit %zu)\n", + sizeof(ctx.skel->bss->dst)); + argp_usage(state); + } + break; + case ARG_CRYPTO_CIPHER: + args.crypto_cipher = strdup(arg); + if (!strlen(args.crypto_cipher) || + strlen(args.crypto_cipher) > MAX_CIPHER_LEN) { + fprintf(stderr, "Invalid crypto cipher len (limit %d)\n", + MAX_CIPHER_LEN); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_crypto_argp = { + .options = opts, + .parser = crypto_parse_arg, +}; + +static void crypto_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "bpf crypto benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void crypto_setup(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + + int err, pfd; + size_t i, sz; + + sz = args.crypto_len; + if (!sz || sz > sizeof(ctx.skel->bss->dst)) { + fprintf(stderr, "invalid encrypt buffer size (source %zu, target %zu)\n", + sz, sizeof(ctx.skel->bss->dst)); + exit(1); + } + + setup_libbpf(); + + ctx.skel = crypto_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + snprintf(ctx.skel->bss->cipher, 128, "%s", args.crypto_cipher); + memcpy(ctx.skel->bss->key, "12345678testtest", 16); + ctx.skel->bss->key_len = 16; + ctx.skel->bss->authsize = 0; + + srandom(time(NULL)); + input = malloc(sz); + for (i = 0; i < sz - 1; i++) + input[i] = '1' + random() % 9; + input[sz - 1] = '\0'; + + ctx.skel->rodata->len = args.crypto_len; + + err = crypto_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + crypto_bench__destroy(ctx.skel); + exit(1); + } + + pfd = bpf_program__fd(ctx.skel->progs.crypto_setup); + if (pfd < 0) { + fprintf(stderr, "failed to get fd for setup prog\n"); + crypto_bench__destroy(ctx.skel); + exit(1); + } + + err = bpf_prog_test_run_opts(pfd, &opts); + if (err || ctx.skel->bss->status) { + fprintf(stderr, "failed to run setup prog: err %d, status %d\n", + err, ctx.skel->bss->status); + crypto_bench__destroy(ctx.skel); + exit(1); + } +} + +static void crypto_encrypt_setup(void) +{ + crypto_setup(); + ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_encrypt); +} + +static void crypto_decrypt_setup(void) +{ + crypto_setup(); + ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_decrypt); +} + +static void crypto_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void *crypto_producer(void *unused) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .repeat = 64, + .data_in = input, + .data_size_in = args.crypto_len, + ); + + while (true) + (void)bpf_prog_test_run_opts(ctx.pfd, &opts); + return NULL; +} + +const struct bench bench_crypto_encrypt = { + .name = "crypto-encrypt", + .argp = &bench_crypto_argp, + .validate = crypto_validate, + .setup = crypto_encrypt_setup, + .producer_thread = crypto_producer, + .measure = crypto_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_crypto_decrypt = { + .name = "crypto-decrypt", + .argp = &bench_crypto_argp, + .validate = crypto_validate, + .setup = crypto_decrypt_setup, + .producer_thread = crypto_producer, + .measure = crypto_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c new file mode 100644 index 000000000000..ee1dc12c5e5e --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "bench.h" +#include "bpf_hashmap_full_update_bench.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_full_update_bench *skel; +} ctx; + +#define MAX_LOOP_NUM 10000 + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd, i, max_entries; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_full_update_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = MAX_LOOP_NUM; + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + /* fill hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench); + for (i = 0; i < max_entries; i++) + bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); +} + +static void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + int i; + + for (i = 0; i < nr_cpus; i++) { + u64 time = ctx.skel->bss->percpu_time[i]; + + if (!time) + continue; + + printf("%d:hash_map_full_perf %lld events per sec\n", + i, ctx.skel->bss->nr_loops * 1000000000ll / time); + } +} + +const struct bench bench_bpf_hashmap_full_update = { + .name = "bpf-hashmap-full-update", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c new file mode 100644 index 000000000000..279ff1b8b5b2 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include <sys/random.h> +#include <argp.h> +#include "bench.h" +#include "bpf_hashmap_lookup.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_lookup *skel; +} ctx; + +/* only available to kernel, so define it here */ +#define BPF_MAX_LOOPS (1<<23) + +#define MAX_KEY_SIZE 1024 /* the size of the key map */ + +static struct { + __u32 key_size; + __u32 map_flags; + __u32 max_entries; + __u32 nr_entries; + __u32 nr_loops; +} args = { + .key_size = 4, + .map_flags = 0, + .max_entries = 1000, + .nr_entries = 500, + .nr_loops = 1000000, +}; + +enum { + ARG_KEY_SIZE = 8001, + ARG_MAP_FLAGS, + ARG_MAX_ENTRIES, + ARG_NR_ENTRIES, + ARG_NR_LOOPS, +}; + +static const struct argp_option opts[] = { + { "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0, + "The hashmap key size (max 1024)"}, + { "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0, + "The hashmap flags passed to BPF_MAP_CREATE"}, + { "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0, + "The hashmap max entries"}, + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "The number of entries to insert/lookup"}, + { "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0, + "The number of loops for the benchmark"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_KEY_SIZE: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > MAX_KEY_SIZE) { + fprintf(stderr, "invalid key_size"); + argp_usage(state); + } + args.key_size = ret; + break; + case ARG_MAP_FLAGS: + ret = strtol(arg, NULL, 0); + if (ret < 0 || ret > UINT_MAX) { + fprintf(stderr, "invalid map_flags"); + argp_usage(state); + } + args.map_flags = ret; + break; + case ARG_MAX_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid max_entries"); + argp_usage(state); + } + args.max_entries = ret; + break; + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_entries"); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_NR_LOOPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > BPF_MAX_LOOPS) { + fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n", + ret, BPF_MAX_LOOPS); + argp_usage(state); + } + args.nr_loops = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_hashmap_lookup_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } + + if (args.nr_entries > args.max_entries) { + fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n", + args.max_entries, args.nr_entries); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static inline void patch_key(u32 i, u32 *key) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *key = i + 1; +#else + *key = __builtin_bswap32(i + 1); +#endif + /* the rest of key is random */ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd; + int ret; + int i; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_lookup__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries); + bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size); + bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8); + bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags); + + ctx.skel->bss->nr_entries = args.nr_entries; + ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries; + + if (args.key_size > 4) { + for (i = 1; i < args.key_size/4; i++) + ctx.skel->bss->key[i] = 2654435761 * i; + } + + ret = bpf_hashmap_lookup__load(ctx.skel); + if (ret) { + bpf_hashmap_lookup__destroy(ctx.skel); + fprintf(stderr, "failed to load map: %s", strerror(-ret)); + exit(1); + } + + /* fill in the hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + for (u64 i = 0; i < args.nr_entries; i++) { + patch_key(i, ctx.skel->bss->key); + bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static inline double events_from_time(u64 time) +{ + if (time) + return args.nr_loops * 1000000000llu / time / 1000000.0L; + + return 0; +} + +static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time) +{ + int i, n = 0; + + *events_mean = 0; + *events_stddev = 0; + *mean_time = 0; + + for (i = 0; i < 32; i++) { + if (!times[i]) + break; + *mean_time += times[i]; + *events_mean += events_from_time(times[i]); + n += 1; + } + if (!n) + return 0; + + *mean_time /= n; + *events_mean /= n; + + if (n > 1) { + for (i = 0; i < n; i++) { + double events_i = *events_mean - events_from_time(times[i]); + *events_stddev += events_i * events_i / (n - 1); + } + *events_stddev = sqrt(*events_stddev); + } + + return n; +} + +static void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + double events_mean, events_stddev; + u64 mean_time; + int i, n; + + for (i = 0; i < nr_cpus; i++) { + n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean, + &events_stddev, &mean_time); + if (n == 0) + continue; + + if (env.quiet) { + /* we expect only one cpu to be present */ + if (env.affinity) + printf("%.3lf\n", events_mean); + else + printf("cpu%02d %.3lf\n", i, events_mean); + } else { + printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec" + " (approximated from %d samples of ~%lums)\n", + i, events_mean, 2*events_stddev, + n, mean_time / 1000000); + } + } +} + +const struct bench bench_bpf_hashmap_lookup = { + .name = "bpf-hashmap-lookup", + .argp = &bench_hashmap_lookup_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c new file mode 100644 index 000000000000..a705cfb2bccc --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include "bench.h" +#include "bpf_loop_bench.skel.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_loop_bench *skel; +} ctx; + +static struct { + __u32 nr_loops; +} args = { + .nr_loops = 10, +}; + +enum { + ARG_NR_LOOPS = 4000, +}; + +static const struct argp_option opts[] = { + { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0, + "Set number of loops for the bpf_loop helper"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_LOOPS: + args.nr_loops = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bpf_loop_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) + /* trigger the bpf program */ + syscall(__NR_getpgid); + + return NULL; +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void setup(void) +{ + struct bpf_link *link; + + setup_libbpf(); + + ctx.skel = bpf_loop_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = args.nr_loops; +} + +const struct bench bench_bpf_loop = { + .name = "bpf-loop", + .argp = &bench_bpf_loop_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c index befba7a82643..ba89ed3936b7 100644 --- a/tools/testing/selftests/bpf/benchs/bench_count.c +++ b/tools/testing/selftests/bpf/benchs/bench_count.c @@ -18,11 +18,6 @@ static void *count_global_producer(void *input) return NULL; } -static void *count_global_consumer(void *input) -{ - return NULL; -} - static void count_global_measure(struct bench_res *res) { struct count_global_ctx *ctx = &count_global_ctx; @@ -36,11 +31,11 @@ static struct count_local_ctx { struct counter *hits; } count_local_ctx; -static void count_local_setup() +static void count_local_setup(void) { struct count_local_ctx *ctx = &count_local_ctx; - ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits)); + ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits)); if (!ctx->hits) exit(1); } @@ -56,11 +51,6 @@ static void *count_local_producer(void *input) return NULL; } -static void *count_local_consumer(void *input) -{ - return NULL; -} - static void count_local_measure(struct bench_res *res) { struct count_local_ctx *ctx = &count_local_ctx; @@ -74,7 +64,6 @@ static void count_local_measure(struct bench_res *res) const struct bench bench_count_global = { .name = "count-global", .producer_thread = count_global_producer, - .consumer_thread = count_global_consumer, .measure = count_global_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -84,7 +73,6 @@ const struct bench bench_count_local = { .name = "count-local", .setup = count_local_setup, .producer_thread = count_local_producer, - .consumer_thread = count_local_consumer, .measure = count_local_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c new file mode 100644 index 000000000000..926ee822143e --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <argp.h> +#include <stdbool.h> +#include <pthread.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <fcntl.h> + +#include "bench.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" +#include "htab_mem_bench.skel.h" + +struct htab_mem_use_case { + const char *name; + const char **progs; + /* Do synchronization between addition thread and deletion thread */ + bool need_sync; +}; + +static struct htab_mem_ctx { + const struct htab_mem_use_case *uc; + struct htab_mem_bench *skel; + pthread_barrier_t *notify; + int fd; +} ctx; + +const char *ow_progs[] = {"overwrite", NULL}; +const char *batch_progs[] = {"batch_add_batch_del", NULL}; +const char *add_del_progs[] = {"add_only", "del_only", NULL}; +const static struct htab_mem_use_case use_cases[] = { + { .name = "overwrite", .progs = ow_progs }, + { .name = "batch_add_batch_del", .progs = batch_progs }, + { .name = "add_del_on_diff_cpu", .progs = add_del_progs, .need_sync = true }, +}; + +static struct htab_mem_args { + u32 value_size; + const char *use_case; + bool preallocated; +} args = { + .value_size = 8, + .use_case = "overwrite", + .preallocated = false, +}; + +enum { + ARG_VALUE_SIZE = 10000, + ARG_USE_CASE = 10001, + ARG_PREALLOCATED = 10002, +}; + +static const struct argp_option opts[] = { + { "value-size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set the value size of hash map (default 8)" }, + { "use-case", ARG_USE_CASE, "USE_CASE", 0, + "Set the use case of hash map: overwrite|batch_add_batch_del|add_del_on_diff_cpu" }, + { "preallocated", ARG_PREALLOCATED, NULL, 0, "use preallocated hash map" }, + {}, +}; + +static error_t htab_mem_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_VALUE_SIZE: + args.value_size = strtoul(arg, NULL, 10); + if (args.value_size > 4096) { + fprintf(stderr, "too big value size %u\n", args.value_size); + argp_usage(state); + } + break; + case ARG_USE_CASE: + args.use_case = strdup(arg); + if (!args.use_case) { + fprintf(stderr, "no mem for use-case\n"); + argp_usage(state); + } + break; + case ARG_PREALLOCATED: + args.preallocated = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_htab_mem_argp = { + .options = opts, + .parser = htab_mem_parse_arg, +}; + +static void htab_mem_validate(void) +{ + if (!strcmp(use_cases[2].name, args.use_case) && env.producer_cnt % 2) { + fprintf(stderr, "%s needs an even number of producers\n", args.use_case); + exit(1); + } +} + +static int htab_mem_bench_init_barriers(void) +{ + pthread_barrier_t *barriers; + unsigned int i, nr; + + if (!ctx.uc->need_sync) + return 0; + + nr = (env.producer_cnt + 1) / 2; + barriers = calloc(nr, sizeof(*barriers)); + if (!barriers) + return -1; + + /* Used for synchronization between two threads */ + for (i = 0; i < nr; i++) + pthread_barrier_init(&barriers[i], NULL, 2); + + ctx.notify = barriers; + return 0; +} + +static void htab_mem_bench_exit_barriers(void) +{ + unsigned int i, nr; + + if (!ctx.notify) + return; + + nr = (env.producer_cnt + 1) / 2; + for (i = 0; i < nr; i++) + pthread_barrier_destroy(&ctx.notify[i]); + free(ctx.notify); +} + +static const struct htab_mem_use_case *htab_mem_find_use_case_or_exit(const char *name) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(use_cases); i++) { + if (!strcmp(name, use_cases[i].name)) + return &use_cases[i]; + } + + fprintf(stderr, "no such use-case: %s\n", name); + fprintf(stderr, "available use case:"); + for (i = 0; i < ARRAY_SIZE(use_cases); i++) + fprintf(stderr, " %s", use_cases[i].name); + fprintf(stderr, "\n"); + exit(1); +} + +static void htab_mem_setup(void) +{ + struct bpf_map *map; + const char **names; + int err; + + setup_libbpf(); + + ctx.uc = htab_mem_find_use_case_or_exit(args.use_case); + err = htab_mem_bench_init_barriers(); + if (err) { + fprintf(stderr, "failed to init barrier\n"); + exit(1); + } + + ctx.fd = cgroup_setup_and_join("/htab_mem"); + if (ctx.fd < 0) + goto cleanup; + + ctx.skel = htab_mem_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + goto cleanup; + } + + map = ctx.skel->maps.htab; + bpf_map__set_value_size(map, args.value_size); + /* Ensure that different CPUs can operate on different subset */ + bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus)); + if (args.preallocated) + bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC); + + names = ctx.uc->progs; + while (*names) { + struct bpf_program *prog; + + prog = bpf_object__find_program_by_name(ctx.skel->obj, *names); + if (!prog) { + fprintf(stderr, "no such program %s\n", *names); + goto cleanup; + } + bpf_program__set_autoload(prog, true); + names++; + } + ctx.skel->bss->nr_thread = env.producer_cnt; + + err = htab_mem_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + goto cleanup; + } + err = htab_mem_bench__attach(ctx.skel); + if (err) { + fprintf(stderr, "failed to attach skeleton\n"); + goto cleanup; + } + return; + +cleanup: + htab_mem_bench__destroy(ctx.skel); + htab_mem_bench_exit_barriers(); + if (ctx.fd >= 0) { + close(ctx.fd); + cleanup_cgroup_environment(); + } + exit(1); +} + +static void htab_mem_add_fn(pthread_barrier_t *notify) +{ + while (true) { + /* Do addition */ + (void)syscall(__NR_getpgid, 0); + /* Notify deletion thread to do deletion */ + pthread_barrier_wait(notify); + /* Wait for deletion to complete */ + pthread_barrier_wait(notify); + } +} + +static void htab_mem_delete_fn(pthread_barrier_t *notify) +{ + while (true) { + /* Wait for addition to complete */ + pthread_barrier_wait(notify); + /* Do deletion */ + (void)syscall(__NR_getppid); + /* Notify addition thread to do addition */ + pthread_barrier_wait(notify); + } +} + +static void *htab_mem_producer(void *arg) +{ + pthread_barrier_t *notify; + int seq; + + if (!ctx.uc->need_sync) { + while (true) + (void)syscall(__NR_getpgid, 0); + return NULL; + } + + seq = (long)arg; + notify = &ctx.notify[seq / 2]; + if (seq & 1) + htab_mem_delete_fn(notify); + else + htab_mem_add_fn(notify); + return NULL; +} + +static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value) +{ + char buf[32]; + ssize_t got; + int fd; + + fd = openat(ctx.fd, name, O_RDONLY); + if (fd < 0) { + /* cgroup v1 ? */ + fprintf(stderr, "no %s\n", name); + *value = 0; + return; + } + + got = read(fd, buf, sizeof(buf) - 1); + if (got <= 0) { + *value = 0; + return; + } + buf[got] = 0; + + *value = strtoull(buf, NULL, 0); + + close(fd); +} + +static void htab_mem_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->op_cnt, 0) / env.producer_cnt; + htab_mem_read_mem_cgrp_file("memory.current", &res->gp_ct); +} + +static void htab_mem_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double loop, mem; + + loop = res->hits / 1000.0 / (delta_ns / 1000000000.0); + mem = res->gp_ct / 1048576.0; + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + printf("per-prod-op %7.2lfk/s, memory usage %7.2lfMiB\n", loop, mem); +} + +static void htab_mem_report_final(struct bench_res res[], int res_cnt) +{ + double mem_mean = 0.0, mem_stddev = 0.0; + double loop_mean = 0.0, loop_stddev = 0.0; + unsigned long peak_mem; + int i; + + for (i = 0; i < res_cnt; i++) { + loop_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); + mem_mean += res[i].gp_ct / 1048576.0 / (0.0 + res_cnt); + } + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + loop_stddev += (loop_mean - res[i].hits / 1000.0) * + (loop_mean - res[i].hits / 1000.0) / + (res_cnt - 1.0); + mem_stddev += (mem_mean - res[i].gp_ct / 1048576.0) * + (mem_mean - res[i].gp_ct / 1048576.0) / + (res_cnt - 1.0); + } + loop_stddev = sqrt(loop_stddev); + mem_stddev = sqrt(mem_stddev); + } + + htab_mem_read_mem_cgrp_file("memory.peak", &peak_mem); + printf("Summary: per-prod-op %7.2lf \u00B1 %7.2lfk/s, memory usage %7.2lf \u00B1 %7.2lfMiB," + " peak memory usage %7.2lfMiB\n", + loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0); + + close(ctx.fd); + cleanup_cgroup_environment(); +} + +const struct bench bench_htab_mem = { + .name = "htab-mem", + .argp = &bench_htab_mem_argp, + .validate = htab_mem_validate, + .setup = htab_mem_setup, + .producer_thread = htab_mem_producer, + .measure = htab_mem_measure, + .report_progress = htab_mem_report_progress, + .report_final = htab_mem_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c new file mode 100644 index 000000000000..452499428ceb --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> +#include <linux/btf.h> + +#include "local_storage_bench.skel.h" +#include "bench.h" + +#include <test_btf.h> + +static struct { + __u32 nr_maps; + __u32 hashmap_nr_keys_used; +} args = { + .nr_maps = 1000, + .hashmap_nr_keys_used = 1000, +}; + +enum { + ARG_NR_MAPS = 6000, + ARG_HASHMAP_NR_KEYS_USED = 6001, +}; + +static const struct argp_option opts[] = { + { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0, + "Set number of local_storage maps"}, + { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS", + 0, "When doing hashmap test, set number of hashmap keys test uses"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_MAPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_maps"); + argp_usage(state); + } + args.nr_maps = ret; + break; + case ARG_HASHMAP_NR_KEYS_USED: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid hashmap_nr_keys_used"); + argp_usage(state); + } + args.hashmap_nr_keys_used = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Keep in sync w/ array of maps in bpf */ +#define MAX_NR_MAPS 1000 +/* keep in sync w/ same define in bpf */ +#define HASHMAP_SZ 4194304 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } + + if (args.nr_maps > MAX_NR_MAPS) { + fprintf(stderr, "nr_maps must be <= 1000\n"); + exit(1); + } + + if (args.hashmap_nr_keys_used > HASHMAP_SZ) { + fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ); + exit(1); + } +} + +static struct { + struct local_storage_bench *skel; + void *bpf_obj; + struct bpf_map *array_of_maps; +} ctx; + +static void prepopulate_hashmap(int fd) +{ + int i, key, val; + + /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so + * populate the hashmap for a similar comparison + */ + for (i = 0; i < HASHMAP_SZ; i++) { + key = val = i; + if (bpf_map_update_elem(fd, &key, &val, 0)) { + fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key); + exit(1); + } + } +} + +static void __setup(struct bpf_program *prog, bool hashmap) +{ + struct bpf_map *inner_map; + int i, fd, mim_fd, err; + + LIBBPF_OPTS(bpf_map_create_opts, create_opts); + + if (!hashmap) + create_opts.map_flags = BPF_F_NO_PREALLOC; + + ctx.skel->rodata->num_maps = args.nr_maps; + ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used; + inner_map = bpf_map__inner_map(ctx.array_of_maps); + create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map); + create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map); + + err = local_storage_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "Error loading skeleton\n"); + goto err_out; + } + + create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj); + + mim_fd = bpf_map__fd(ctx.array_of_maps); + if (mim_fd < 0) { + fprintf(stderr, "Error getting map_in_map fd\n"); + goto err_out; + } + + for (i = 0; i < args.nr_maps; i++) { + if (hashmap) + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), + sizeof(int), HASHMAP_SZ, &create_opts); + else + fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int), + sizeof(int), 0, &create_opts); + if (fd < 0) { + fprintf(stderr, "Error creating map %d: %d\n", i, fd); + goto err_out; + } + + if (hashmap) + prepopulate_hashmap(fd); + + err = bpf_map_update_elem(mim_fd, &i, &fd, 0); + if (err) { + fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i); + goto err_out; + } + } + + if (!bpf_program__attach(prog)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void hashmap_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_hash_maps; + skel->rodata->use_hashmap = 1; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, true); +} + +static void local_storage_cache_get_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, false); +} + +static void local_storage_cache_get_interleaved_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 1; + + __setup(skel->progs.get_local, false); +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0); +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +/* cache sequential and interleaved get benchs test local_storage get + * performance, specifically they demonstrate performance cliff of + * current list-plus-cache local_storage model. + * + * cache sequential get: call bpf_task_storage_get on n maps in order + * cache interleaved get: like "sequential get", but interleave 4 calls to the + * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal + * is to mimic environment where many progs are accessing their local_storage + * maps, with 'our' prog needing to access its map more often than others + */ +const struct bench bench_local_storage_cache_seq_get = { + .name = "local-storage-cache-seq-get", + .argp = &bench_local_storage_argp, + .validate = validate, + .setup = local_storage_cache_get_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_interleaved_get = { + .name = "local-storage-cache-int-get", + .argp = &bench_local_storage_argp, + .validate = validate, + .setup = local_storage_cache_get_interleaved_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_hashmap_control = { + .name = "local-storage-cache-hashmap-control", + .argp = &bench_local_storage_argp, + .validate = validate, + .setup = hashmap_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c new file mode 100644 index 000000000000..e2ff8ea1cb79 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <pthread.h> +#include <argp.h> + +#include "bench.h" +#include "bench_local_storage_create.skel.h" + +struct thread { + int *fds; + pthread_t *pthds; + int *pthd_results; +}; + +static struct bench_local_storage_create *skel; +static struct thread *threads; +static long create_owner_errs; +static int storage_type = BPF_MAP_TYPE_SK_STORAGE; +static int batch_sz = 32; + +enum { + ARG_BATCH_SZ = 9000, + ARG_STORAGE_TYPE = 9001, +}; + +static const struct argp_option opts[] = { + { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0, + "The number of storage creations in each batch" }, + { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0, + "The type of local storage to test (socket or task)" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + int ret; + + switch (key) { + case ARG_BATCH_SZ: + ret = atoi(arg); + if (ret < 1) { + fprintf(stderr, "invalid batch-size\n"); + argp_usage(state); + } + batch_sz = ret; + break; + case ARG_STORAGE_TYPE: + if (!strcmp(arg, "task")) { + storage_type = BPF_MAP_TYPE_TASK_STORAGE; + } else if (!strcmp(arg, "socket")) { + storage_type = BPF_MAP_TYPE_SK_STORAGE; + } else { + fprintf(stderr, "invalid storage-type (socket or task)\n"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_create_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, + "local-storage-create benchmark does not need consumer\n"); + exit(1); + } +} + +static void setup(void) +{ + int i; + + skel = bench_local_storage_create__open_and_load(); + if (!skel) { + fprintf(stderr, "error loading skel\n"); + exit(1); + } + + skel->bss->bench_pid = getpid(); + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { + if (!bpf_program__attach(skel->progs.socket_post_create)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + } else { + if (!bpf_program__attach(skel->progs.sched_process_fork)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + } + + if (!bpf_program__attach(skel->progs.kmalloc)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + + threads = calloc(env.producer_cnt, sizeof(*threads)); + + if (!threads) { + fprintf(stderr, "cannot alloc thread_res\n"); + exit(1); + } + + for (i = 0; i < env.producer_cnt; i++) { + struct thread *t = &threads[i]; + + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { + t->fds = malloc(batch_sz * sizeof(*t->fds)); + if (!t->fds) { + fprintf(stderr, "cannot alloc t->fds\n"); + exit(1); + } + } else { + t->pthds = malloc(batch_sz * sizeof(*t->pthds)); + if (!t->pthds) { + fprintf(stderr, "cannot alloc t->pthds\n"); + exit(1); + } + t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results)); + if (!t->pthd_results) { + fprintf(stderr, "cannot alloc t->pthd_results\n"); + exit(1); + } + } + } +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&skel->bss->create_cnts, 0); + res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); +} + +static void *sk_producer(void *input) +{ + struct thread *t = &threads[(long)(input)]; + int *fds = t->fds; + int i; + + while (true) { + for (i = 0; i < batch_sz; i++) { + fds[i] = socket(AF_INET6, SOCK_DGRAM, 0); + if (fds[i] == -1) + atomic_inc(&create_owner_errs); + } + + for (i = 0; i < batch_sz; i++) { + if (fds[i] != -1) + close(fds[i]); + } + } + + return NULL; +} + +static void *thread_func(void *arg) +{ + return NULL; +} + +static void *task_producer(void *input) +{ + struct thread *t = &threads[(long)(input)]; + pthread_t *pthds = t->pthds; + int *pthd_results = t->pthd_results; + int i; + + while (true) { + for (i = 0; i < batch_sz; i++) { + pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL); + if (pthd_results[i]) + atomic_inc(&create_owner_errs); + } + + for (i = 0; i < batch_sz; i++) { + if (!pthd_results[i]) + pthread_join(pthds[i], NULL); + } + } + + return NULL; +} + +static void *producer(void *input) +{ + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) + return sk_producer(input); + else + return task_producer(input); +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double creates_per_sec, kmallocs_per_create; + + creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0); + kmallocs_per_create = (double)res->drops / res->hits; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + printf("creates %8.3lfk/s (%7.3lfk/prod), ", + creates_per_sec, creates_per_sec / env.producer_cnt); + printf("%3.2lf kmallocs/create\n", kmallocs_per_create); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + double creates_mean = 0.0, creates_stddev = 0.0; + long total_creates = 0, total_kmallocs = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); + total_creates += res[i].hits; + total_kmallocs += res[i].drops; + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + creates_stddev += (creates_mean - res[i].hits / 1000.0) * + (creates_mean - res[i].hits / 1000.0) / + (res_cnt - 1.0); + creates_stddev = sqrt(creates_stddev); + } + printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ", + creates_mean, creates_stddev, creates_mean / env.producer_cnt); + printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates); + if (create_owner_errs || skel->bss->create_errs) + printf("%s() errors %ld create_errs %ld\n", + storage_type == BPF_MAP_TYPE_SK_STORAGE ? + "socket" : "pthread_create", + create_owner_errs, + skel->bss->create_errs); +} + +/* Benchmark performance of creating bpf local storage */ +const struct bench bench_local_storage_create = { + .name = "local-storage-create", + .argp = &bench_local_storage_create_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c new file mode 100644 index 000000000000..edf0b00418c1 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> + +#include <sys/prctl.h> +#include "local_storage_rcu_tasks_trace_bench.skel.h" +#include "bench.h" + +#include <signal.h> + +static struct { + __u32 nr_procs; + __u32 kthread_pid; +} args = { + .nr_procs = 1000, + .kthread_pid = 0, +}; + +enum { + ARG_NR_PROCS = 7000, + ARG_KTHREAD_PID = 7001, +}; + +static const struct argp_option opts[] = { + { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0, + "Set number of user processes to spin up"}, + { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, + "Pid of rcu_tasks_trace kthread for ticks tracking"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_PROCS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_procs\n"); + argp_usage(state); + } + args.nr_procs = ret; + break; + case ARG_KTHREAD_PID: + ret = strtol(arg, NULL, 10); + if (ret < 1) { + fprintf(stderr, "invalid kthread_pid\n"); + argp_usage(state); + } + args.kthread_pid = ret; + break; +break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_rcu_tasks_trace_argp = { + .options = opts, + .parser = parse_arg, +}; + +#define MAX_SLEEP_PROCS 150000 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } + + if (args.nr_procs > MAX_SLEEP_PROCS) { + fprintf(stderr, "benchmark supports up to %u sleeper procs!\n", + MAX_SLEEP_PROCS); + exit(1); + } +} + +static long kthread_pid_ticks(void) +{ + char procfs_path[100]; + long stime; + FILE *f; + + if (!args.kthread_pid) + return -1; + + sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid); + f = fopen(procfs_path, "r"); + if (!f) { + fprintf(stderr, "couldn't open %s, exiting\n", procfs_path); + goto err_out; + } + if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) { + fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path); + goto err_out; + } + fclose(f); + return stime; + +err_out: + if (f) + fclose(f); + exit(1); + return 0; +} + +static struct { + struct local_storage_rcu_tasks_trace_bench *skel; + long prev_kthread_stime; +} ctx; + +static void sleep_and_loop(void) +{ + while (true) { + sleep(rand() % 4); + syscall(__NR_getpgid); + } +} + +static void local_storage_tasks_trace_setup(void) +{ + int i, err, forkret, runner_pid; + + runner_pid = getpid(); + + for (i = 0; i < args.nr_procs; i++) { + forkret = fork(); + if (forkret < 0) { + fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i, + args.nr_procs); + goto err_out; + } + + if (!forkret) { + err = prctl(PR_SET_PDEATHSIG, SIGKILL); + if (err < 0) { + fprintf(stderr, "prctl failed with err %d, exiting\n", errno); + goto err_out; + } + + if (getppid() != runner_pid) { + fprintf(stderr, "Runner died while spinning up procs, exiting\n"); + goto err_out; + } + sleep_and_loop(); + } + } + printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid); + + setup_libbpf(); + + ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "Error doing open_and_load, exiting\n"); + goto err_out; + } + + ctx.prev_kthread_stime = kthread_pid_ticks(); + + if (!bpf_program__attach(ctx.skel->progs.get_local)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.pregp_step)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.postgp)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void measure(struct bench_res *res) +{ + long ticks; + + res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0); + res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0); + ticks = kthread_pid_ticks(); + res->stime = ticks - ctx.prev_kthread_stime; + ctx.prev_kthread_stime = ticks; +} + +static void *producer(void *input) +{ + while (true) + syscall(__NR_getpgid); + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + if (ctx.skel->bss->unexpected) { + fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp)."); + fprintf(stderr, "Data can't be trusted, exiting\n"); + exit(1); + } + + if (env.quiet) + return; + + printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", + iter, res->gp_ns / (double)res->gp_ct); + printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n", + iter, res->stime / (double)res->gp_ct); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + struct basic_stats gp_stat; + + grace_period_latency_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY tasks_trace grace period latency"); + printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev); + grace_period_ticks_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY ticks per tasks_trace grace period"); + printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev); +} + +/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use + * of RCU Tasks-Trace. + * + * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside + * from sleep() loop, and creating/destroying BPF task-local storage on wakeup. + * The number of forked tasks is configurable. + * + * exercising code paths which call call_rcu_tasks_trace while there are many + * thousands of tasks on the system should result in RCU Tasks-Trace having to + * do a noticeable amount of work. + * + * This should be observable by measuring rcu_tasks_trace_kthread CPU usage + * after the grace period has ended, or by measuring grace period latency. + * + * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step + * and rcu_tasks_trace_postgp functions to measure grace period latency and + * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks + */ +const struct bench bench_local_storage_tasks_trace = { + .name = "local-storage-tasks-trace", + .argp = &bench_local_storage_rcu_tasks_trace_argp, + .validate = validate, + .setup = local_storage_tasks_trace_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index a967674098ad..bf66893c7a33 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -11,14 +11,14 @@ static struct ctx { int fd; } ctx; -static void validate() +static void validate(void) { if (env.producer_cnt != 1) { fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -43,7 +43,7 @@ static void measure(struct bench_res *res) res->hits = atomic_swap(&ctx.hits.value, 0); } -static void setup_ctx() +static void setup_ctx(void) { setup_libbpf(); @@ -65,58 +65,52 @@ static void attach_bpf(struct bpf_program *prog) struct bpf_link *link; link = bpf_program__attach(prog); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } } -static void setup_base() +static void setup_base(void) { setup_ctx(); } -static void setup_kprobe() +static void setup_kprobe(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog1); } -static void setup_kretprobe() +static void setup_kretprobe(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog2); } -static void setup_rawtp() +static void setup_rawtp(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog3); } -static void setup_fentry() +static void setup_fentry(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog4); } -static void setup_fexit() +static void setup_fexit(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog5); } -static void *consumer(void *input) -{ - return NULL; -} - const struct bench bench_rename_base = { .name = "rename-base", .validate = validate, .setup = setup_base, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -127,7 +121,6 @@ const struct bench bench_rename_kprobe = { .validate = validate, .setup = setup_kprobe, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -138,7 +131,6 @@ const struct bench bench_rename_kretprobe = { .validate = validate, .setup = setup_kretprobe, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -149,7 +141,6 @@ const struct bench bench_rename_rawtp = { .validate = validate, .setup = setup_rawtp, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -160,7 +151,6 @@ const struct bench bench_rename_fentry = { .validate = validate, .setup = setup_fentry, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -171,7 +161,6 @@ const struct bench bench_rename_fexit = { .validate = validate, .setup = setup_fexit, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index da87c7f31891..e1ee979e6acc 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -88,15 +88,15 @@ const struct argp bench_ringbufs_argp = { static struct counter buf_hits; -static inline void bufs_trigger_batch() +static inline void bufs_trigger_batch(void) { (void)syscall(__NR_getpgid); } -static void bufs_validate() +static void bufs_validate(void) { if (env.consumer_cnt != 1) { - fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); + fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); exit(1); } @@ -132,7 +132,7 @@ static void ringbuf_libbpf_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static struct ringbuf_bench *ringbuf_setup_skeleton() +static struct ringbuf_bench *ringbuf_setup_skeleton(void) { struct ringbuf_bench *skel; @@ -151,7 +151,7 @@ static struct ringbuf_bench *ringbuf_setup_skeleton() /* record data + header take 16 bytes */ skel->rodata->wakeup_data_size = args.sample_rate * 16; - bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz); + bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz); if (ringbuf_bench__load(skel)) { fprintf(stderr, "failed to load skeleton\n"); @@ -167,7 +167,7 @@ static int buf_process_sample(void *ctx, void *data, size_t len) return 0; } -static void ringbuf_libbpf_setup() +static void ringbuf_libbpf_setup(void) { struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; struct bpf_link *link; @@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup() } link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } @@ -223,7 +223,7 @@ static void ringbuf_custom_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static void ringbuf_custom_setup() +static void ringbuf_custom_setup(void) { struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; const size_t page_size = getpagesize(); @@ -271,7 +271,7 @@ static void ringbuf_custom_setup() } link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program\n"); exit(1); } @@ -319,7 +319,7 @@ static void ringbuf_custom_process_ring(struct ringbuf_custom *r) smp_store_release(r->consumer_pos, cons_pos); else break; - }; + } } static void *ringbuf_custom_consumer(void *input) @@ -352,7 +352,7 @@ static void perfbuf_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static struct perfbuf_bench *perfbuf_setup_skeleton() +static struct perfbuf_bench *perfbuf_setup_skeleton(void) { struct perfbuf_bench *skel; @@ -390,21 +390,16 @@ perfbuf_process_sample_raw(void *input_ctx, int cpu, return LIBBPF_PERF_EVENT_CONT; } -static void perfbuf_libbpf_setup() +static void perfbuf_libbpf_setup(void) { struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; struct perf_event_attr attr; - struct perf_buffer_raw_opts pb_opts = { - .event_cb = perfbuf_process_sample_raw, - .ctx = (void *)(long)0, - .attr = &attr, - }; struct bpf_link *link; ctx->skel = perfbuf_setup_skeleton(); memset(&attr, 0, sizeof(attr)); - attr.config = PERF_COUNT_SW_BPF_OUTPUT, + attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; /* notify only every Nth sample */ @@ -423,14 +418,15 @@ static void perfbuf_libbpf_setup() } ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf), - args.perfbuf_sz, &pb_opts); + args.perfbuf_sz, &attr, + perfbuf_process_sample_raw, NULL, NULL); if (!ctx->perfbuf) { fprintf(stderr, "failed to create perfbuf\n"); exit(1); } link = bpf_program__attach(ctx->skel->progs.bench_perfbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program\n"); exit(1); } @@ -522,6 +518,7 @@ static void *perfbuf_custom_consumer(void *input) const struct bench bench_rb_libbpf = { .name = "rb-libbpf", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = ringbuf_libbpf_setup, .producer_thread = bufs_sample_producer, @@ -533,6 +530,7 @@ const struct bench bench_rb_libbpf = { const struct bench bench_rb_custom = { .name = "rb-custom", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = ringbuf_custom_setup, .producer_thread = bufs_sample_producer, @@ -544,6 +542,7 @@ const struct bench bench_rb_custom = { const struct bench bench_pb_libbpf = { .name = "pb-libbpf", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = perfbuf_libbpf_setup, .producer_thread = bufs_sample_producer, @@ -555,6 +554,7 @@ const struct bench bench_pb_libbpf = { const struct bench bench_pb_custom = { .name = "pb-custom", + .argp = &bench_ringbufs_argp, .validate = bufs_validate, .setup = perfbuf_libbpf_setup, .producer_thread = bufs_sample_producer, diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c new file mode 100644 index 000000000000..a5e1428fd7a0 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <argp.h> +#include "bench.h" +#include "strncmp_bench.skel.h" + +static struct strncmp_ctx { + struct strncmp_bench *skel; +} ctx; + +static struct strncmp_args { + u32 cmp_str_len; +} args = { + .cmp_str_len = 32, +}; + +enum { + ARG_CMP_STR_LEN = 5000, +}; + +static const struct argp_option opts[] = { + { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0, + "Set the length of compared string" }, + {}, +}; + +static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_CMP_STR_LEN: + args.cmp_str_len = strtoul(arg, NULL, 10); + if (!args.cmp_str_len || + args.cmp_str_len >= sizeof(ctx.skel->bss->str)) { + fprintf(stderr, "Invalid cmp str len (limit %zu)\n", + sizeof(ctx.skel->bss->str)); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_strncmp_argp = { + .options = opts, + .parser = strncmp_parse_arg, +}; + +static void strncmp_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "strncmp benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void strncmp_setup(void) +{ + int err; + char *target; + size_t i, sz; + + sz = sizeof(ctx.skel->rodata->target); + if (!sz || sz < sizeof(ctx.skel->bss->str)) { + fprintf(stderr, "invalid string size (target %zu, src %zu)\n", + sz, sizeof(ctx.skel->bss->str)); + exit(1); + } + + setup_libbpf(); + + ctx.skel = strncmp_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + srandom(time(NULL)); + target = ctx.skel->rodata->target; + for (i = 0; i < sz - 1; i++) + target[i] = '1' + random() % 9; + target[sz - 1] = '\0'; + + ctx.skel->rodata->cmp_str_len = args.cmp_str_len; + + memcpy(ctx.skel->bss->str, target, args.cmp_str_len); + ctx.skel->bss->str[args.cmp_str_len] = '\0'; + /* Make bss->str < rodata->target */ + ctx.skel->bss->str[args.cmp_str_len - 1] -= 1; + + err = strncmp_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + strncmp_bench__destroy(ctx.skel); + exit(1); + } +} + +static void strncmp_attach_prog(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void strncmp_no_helper_setup(void) +{ + strncmp_setup(); + strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper); +} + +static void strncmp_helper_setup(void) +{ + strncmp_setup(); + strncmp_attach_prog(ctx.skel->progs.strncmp_helper); +} + +static void *strncmp_producer(void *ctx) +{ + while (true) + (void)syscall(__NR_getpgid); + return NULL; +} + +static void strncmp_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +const struct bench bench_strncmp_no_helper = { + .name = "strncmp-no-helper", + .argp = &bench_strncmp_argp, + .validate = strncmp_validate, + .setup = strncmp_no_helper_setup, + .producer_thread = strncmp_producer, + .measure = strncmp_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_strncmp_helper = { + .name = "strncmp-helper", + .argp = &bench_strncmp_argp, + .validate = strncmp_validate, + .setup = strncmp_helper_setup, + .producer_thread = strncmp_producer, + .measure = strncmp_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 2a0b6c9885a4..4b05539f167d 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -1,58 +1,161 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <argp.h> +#include <unistd.h> +#include <stdint.h> #include "bench.h" #include "trigger_bench.skel.h" +#include "trace_helpers.h" + +#define MAX_TRIG_BATCH_ITERS 1000 + +static struct { + __u32 batch_iters; +} args = { + .batch_iters = 100, +}; + +enum { + ARG_TRIG_BATCH_ITERS = 7000, +}; + +static const struct argp_option opts[] = { + { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, + "Number of in-kernel iterations per one driver test run"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_TRIG_BATCH_ITERS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { + fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", + 1, MAX_TRIG_BATCH_ITERS); + argp_usage(state); + } + args.batch_iters = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_trigger_batch_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* adjust slot shift in inc_hits() if changing */ +#define MAX_BUCKETS 256 + +#pragma GCC diagnostic ignored "-Wattributes" /* BPF triggering benchmarks */ static struct trigger_ctx { struct trigger_bench *skel; + bool usermode_counters; + int driver_prog_fd; } ctx; -static struct counter base_hits; +static struct counter base_hits[MAX_BUCKETS]; + +static __always_inline void inc_counter(struct counter *counters) +{ + static __thread int tid = 0; + unsigned slot; + + if (unlikely(tid == 0)) + tid = syscall(SYS_gettid); + + /* multiplicative hashing, it's fast */ + slot = 2654435769U * tid; + slot >>= 24; -static void trigger_validate() + atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ +} + +static long sum_and_reset_counters(struct counter *counters) +{ + int i; + long sum = 0; + + for (i = 0; i < MAX_BUCKETS; i++) + sum += atomic_swap(&counters[i].value, 0); + return sum; +} + +static void trigger_validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } -static void *trigger_base_producer(void *input) +static void *trigger_producer(void *input) { - while (true) { - (void)syscall(__NR_getpgid); - atomic_inc(&base_hits.value); + if (ctx.usermode_counters) { + while (true) { + (void)syscall(__NR_getpgid); + inc_counter(base_hits); + } + } else { + while (true) + (void)syscall(__NR_getpgid); } return NULL; } -static void trigger_base_measure(struct bench_res *res) +static void *trigger_producer_batch(void *input) { - res->hits = atomic_swap(&base_hits.value, 0); -} + int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); -static void *trigger_producer(void *input) -{ while (true) - (void)syscall(__NR_getpgid); + bpf_prog_test_run_opts(fd, NULL); + return NULL; } static void trigger_measure(struct bench_res *res) { - res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + if (ctx.usermode_counters) + res->hits = sum_and_reset_counters(base_hits); + else + res->hits = sum_and_reset_counters(ctx.skel->bss->hits); } -static void setup_ctx() +static void setup_ctx(void) { setup_libbpf(); - ctx.skel = trigger_bench__open_and_load(); + ctx.skel = trigger_bench__open(); if (!ctx.skel) { fprintf(stderr, "failed to open skeleton\n"); exit(1); } + + /* default "driver" BPF program */ + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); + + ctx.skel->rodata->batch_iters = args.batch_iters; +} + +static void load_ctx(void) +{ + int err; + + err = trigger_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } } static void attach_bpf(struct bpf_program *prog) @@ -60,125 +163,294 @@ static void attach_bpf(struct bpf_program *prog) struct bpf_link *link; link = bpf_program__attach(prog); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } } -static void trigger_tp_setup() +static void trigger_syscall_count_setup(void) { - setup_ctx(); - attach_bpf(ctx.skel->progs.bench_trigger_tp); + ctx.usermode_counters = true; } -static void trigger_rawtp_setup() +/* Batched, staying mostly in-kernel triggering setups */ +static void trigger_kernel_count_setup(void) { setup_ctx(); - attach_bpf(ctx.skel->progs.bench_trigger_raw_tp); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); } -static void trigger_kprobe_setup() +static void trigger_kprobe_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kprobe); } -static void trigger_fentry_setup() +static void trigger_kretprobe_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); +} + +static void trigger_kprobe_multi_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); +} + +static void trigger_kretprobe_multi_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); +} + +static void trigger_fentry_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry); } -static void trigger_fentry_sleep_setup() +static void trigger_fexit_setup(void) { setup_ctx(); - attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fexit); } -static void trigger_fmodret_setup() +static void trigger_fmodret_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); attach_bpf(ctx.skel->progs.bench_trigger_fmodret); } -static void *trigger_consumer(void *input) +static void trigger_tp_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_tp); +} + +static void trigger_rawtp_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_rawtp); +} + +/* make sure call is not inlined and not avoided by compiler, so __weak and + * inline asm volatile in the body of the function + * + * There is a performance difference between uprobing at nop location vs other + * instructions. So use two different targets, one of which starts with nop + * and another doesn't. + * + * GCC doesn't generate stack setup preample for these functions due to them + * having no input arguments and doing nothing in the body. + */ +__nocf_check __weak void uprobe_target_nop(void) +{ + asm volatile ("nop"); +} + +__weak void opaque_noop_func(void) +{ +} + +__nocf_check __weak int uprobe_target_push(void) +{ + /* overhead of function call is negligible compared to uprobe + * triggering, so this shouldn't affect benchmark results much + */ + opaque_noop_func(); + return 1; +} + +__nocf_check __weak void uprobe_target_ret(void) { + asm volatile (""); +} + +static void *uprobe_producer_count(void *input) +{ + while (true) { + uprobe_target_nop(); + inc_counter(base_hits); + } return NULL; } -const struct bench bench_trig_base = { - .name = "trig-base", - .validate = trigger_validate, - .producer_thread = trigger_base_producer, - .consumer_thread = trigger_consumer, - .measure = trigger_base_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; +static void *uprobe_producer_nop(void *input) +{ + while (true) + uprobe_target_nop(); + return NULL; +} -const struct bench bench_trig_tp = { - .name = "trig-tp", - .validate = trigger_validate, - .setup = trigger_tp_setup, - .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; +static void *uprobe_producer_push(void *input) +{ + while (true) + uprobe_target_push(); + return NULL; +} -const struct bench bench_trig_rawtp = { - .name = "trig-rawtp", - .validate = trigger_validate, - .setup = trigger_rawtp_setup, - .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; +static void *uprobe_producer_ret(void *input) +{ + while (true) + uprobe_target_ret(); + return NULL; +} -const struct bench bench_trig_kprobe = { - .name = "trig-kprobe", - .validate = trigger_validate, - .setup = trigger_kprobe_setup, - .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; +static void usetup(bool use_retprobe, void *target_addr) +{ + size_t uprobe_offset; + struct bpf_link *link; + int err; -const struct bench bench_trig_fentry = { - .name = "trig-fentry", - .validate = trigger_validate, - .setup = trigger_fentry_setup, - .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; + setup_libbpf(); -const struct bench bench_trig_fentry_sleep = { - .name = "trig-fentry-sleep", - .validate = trigger_validate, - .setup = trigger_fentry_sleep_setup, - .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; + ctx.skel = trigger_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } -const struct bench bench_trig_fmodret = { - .name = "trig-fmodret", + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); + + err = trigger_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + uprobe_offset = get_uprobe_offset(target_addr); + link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, + use_retprobe, + -1 /* all PIDs */, + "/proc/self/exe", + uprobe_offset); + if (!link) { + fprintf(stderr, "failed to attach uprobe!\n"); + exit(1); + } + ctx.skel->links.bench_trigger_uprobe = link; +} + +static void usermode_count_setup(void) +{ + ctx.usermode_counters = true; +} + +static void uprobe_nop_setup(void) +{ + usetup(false, &uprobe_target_nop); +} + +static void uretprobe_nop_setup(void) +{ + usetup(true, &uprobe_target_nop); +} + +static void uprobe_push_setup(void) +{ + usetup(false, &uprobe_target_push); +} + +static void uretprobe_push_setup(void) +{ + usetup(true, &uprobe_target_push); +} + +static void uprobe_ret_setup(void) +{ + usetup(false, &uprobe_target_ret); +} + +static void uretprobe_ret_setup(void) +{ + usetup(true, &uprobe_target_ret); +} + +const struct bench bench_trig_syscall_count = { + .name = "trig-syscall-count", .validate = trigger_validate, - .setup = trigger_fmodret_setup, + .setup = trigger_syscall_count_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; + +/* batched (staying mostly in kernel) kprobe/fentry benchmarks */ +#define BENCH_TRIG_KERNEL(KIND, NAME) \ +const struct bench bench_trig_##KIND = { \ + .name = "trig-" NAME, \ + .setup = trigger_##KIND##_setup, \ + .producer_thread = trigger_producer_batch, \ + .measure = trigger_measure, \ + .report_progress = hits_drops_report_progress, \ + .report_final = hits_drops_report_final, \ + .argp = &bench_trigger_batch_argp, \ +} + +BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); +BENCH_TRIG_KERNEL(kprobe, "kprobe"); +BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); +BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); +BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); +BENCH_TRIG_KERNEL(fentry, "fentry"); +BENCH_TRIG_KERNEL(fexit, "fexit"); +BENCH_TRIG_KERNEL(fmodret, "fmodret"); +BENCH_TRIG_KERNEL(tp, "tp"); +BENCH_TRIG_KERNEL(rawtp, "rawtp"); + +/* uprobe benchmarks */ +#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ +const struct bench bench_trig_##KIND = { \ + .name = "trig-" NAME, \ + .validate = trigger_validate, \ + .setup = KIND##_setup, \ + .producer_thread = uprobe_producer_##PRODUCER, \ + .measure = trigger_measure, \ + .report_progress = hits_drops_report_progress, \ + .report_final = hits_drops_report_final, \ +} + +BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); +BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); +BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); +BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); +BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); +BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); +BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh new file mode 100755 index 000000000000..8ffd385ab2f4 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Bloom filter map" +for v in 2 4 8 16 40; do +for t in 1 4 8 12 16; do +for h in {1..10}; do +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize "Lookups, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)" + printf "\t" + summarize "Updates, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)" + printf "\t" + summarize_percentage "False positive rate: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)" + done + printf "\n" +done +done +done + +header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)" +for v in 2 4 8 16 40; do +for h in {1..10}; do +subtitle "value_size: $v, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize_total "Hashmap without bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)" + printf "\t" + summarize_total "Hashmap with bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)" + done + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh new file mode 100755 index 000000000000..cd2efd3fdef3 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update) +printf "$summary" +printf "\n" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh new file mode 100755 index 000000000000..d4f5f73b356b --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for t in 1 4 8 12 16; do +for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do +subtitle "nr_loops: $i, nr_threads: $t" + summarize_ops "bpf_loop: " \ + "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)" + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh new file mode 100755 index 000000000000..9ff5832463a2 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +htab_mem() +{ + echo -n "per-prod-op: " + echo -n "$*" | sed -E "s/.* per-prod-op\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+k\/s).*/\1/" + echo -n -e ", avg mem: " + echo -n "$*" | sed -E "s/.* memory usage\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+MiB).*/\1/" + echo -n ", peak mem: " + echo "$*" | sed -E "s/.* peak memory usage\s+([0-9]+\.[0-9]+MiB).*/\1/" +} + +summarize_htab_mem() +{ + local bench="$1" + local summary=$(echo $2 | tail -n1) + + printf "%-20s %s\n" "$bench" "$(htab_mem $summary)" +} + +htab_mem_bench() +{ + local name + + for name in overwrite batch_add_batch_del add_del_on_diff_cpu + do + summarize_htab_mem "$name" "$($RUN_BENCH htab-mem --use-case $name -p8 "$@")" + done +} + +header "preallocated" +htab_mem_bench "--preallocated" + +header "normal bpf ma" +htab_mem_bench diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh new file mode 100755 index 000000000000..2eb2b513a173 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Hashmap Control" +for i in 10 1000 10000 100000 4194304; do +subtitle "num keys: $i" + summarize_local_storage "hashmap (control) sequential get: "\ + "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)" + printf "\n" +done + +header "Local Storage" +for i in 1 10 16 17 24 32 100 1000; do +subtitle "num_maps: $i" + summarize_local_storage "local_storage cache sequential get: "\ + "$(./bench --nr_maps $i local-storage-cache-seq-get)" + summarize_local_storage "local_storage cache interleaved get: "\ + "$(./bench --nr_maps $i local-storage-cache-int-get)" + printf "\n" +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh new file mode 100755 index 000000000000..3e8a969f2096 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +kthread_pid=`pgrep rcu_tasks_trace_kthread` + +if [ -z $kthread_pid ]; then + echo "error: Couldn't find rcu_tasks_trace_kthread" + exit 1 +fi + +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh index 16f774b1cdbe..7b281dbe4165 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh @@ -2,7 +2,7 @@ set -eufo pipefail -for i in base kprobe kretprobe rawtp fentry fexit fmodret +for i in base kprobe kretprobe rawtp fentry fexit do summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) printf "%-10s: %s\n" $i "$summary" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index af4aa04caba6..91e3567962ff 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -1,75 +1,51 @@ #!/bin/bash -set -eufo pipefail - -RUN_BENCH="sudo ./bench -w3 -d10 -a" - -function hits() -{ - echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} +source ./benchs/run_common.sh -function drops() -{ - echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" -} - -function header() -{ - local len=${#1} - - printf "\n%s\n" "$1" - for i in $(seq 1 $len); do printf '='; done - printf '\n' -} +set -eufo pipefail -function summarize() -{ - bench="$1" - summary=$(echo $2 | tail -n1) - printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" -} +RUN_RB_BENCH="$RUN_BENCH -c1" header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH $b)" + summarize $b "$($RUN_RB_BENCH $b)" done header "Single-producer, parallel producer, sampled notification" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-sampled $b)" + summarize $b "$($RUN_RB_BENCH --rb-sampled $b)" done header "Single-producer, back-to-back mode" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-b2b $b)" - summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)" + summarize $b "$($RUN_RB_BENCH --rb-b2b $b)" + summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)" done header "Ringbuf back-to-back, effect of sample rate" for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do - summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" + summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" done header "Perfbuf back-to-back, effect of sample rate" for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do - summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" + summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" done header "Ringbuf back-to-back, reserve+commit vs output" -summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)" -summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)" +summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)" +summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)" header "Ringbuf sampled, reserve+commit vs output" -summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)" -summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)" +summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)" +summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)" header "Single-producer, consumer/producer competing on the same CPU, low batch count" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" + summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" done header "Ringbuf, multi-producer contention" for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do - summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" + summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh new file mode 100755 index 000000000000..142697284b45 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for s in 1 8 64 512 2048 4095; do + for b in no-helper helper; do + summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})" + done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh index 78e83f243294..a690f5a68b6b 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh @@ -2,8 +2,22 @@ set -eufo pipefail -for i in base tp rawtp kprobe fentry fmodret -do - summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) - printf "%-10s: %s\n" $i "$summary" +def_tests=( \ + usermode-count kernel-count syscall-count \ + fentry fexit fmodret \ + rawtp tp \ + kprobe kprobe-multi \ + kretprobe kretprobe-multi \ +) + +tests=("$@") +if [ ${#tests[@]} -eq 0 ]; then + tests=("${def_tests[@]}") +fi + +p=${PROD_CNT:-1} + +for t in "${tests[@]}"; do + summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-15s: %s\n" $t "$summary" done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh new file mode 100755 index 000000000000..af169f831f2f --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eufo pipefail + +for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret} +do + summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-15s: %s\n" $i "$summary" +done diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh new file mode 100644 index 000000000000..d9f40af82006 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function subtitle() +{ + local len=${#1} + printf "\t%s\n" "$1" +} + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function percentage() +{ + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" +} + +function ops() +{ + echo -n "throughput: " + echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", latency: " + echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" +} + +function local_storage() +{ + echo -n "hits throughput: " + echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", hits latency: " + echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" + echo -n ", important_hits throughput: " + echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" +} + +function total() +{ + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +function summarize_percentage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" +} + +function summarize_ops() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(ops $summary)" +} + +function summarize_local_storage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(local_storage $summary)" +} + +function summarize_total() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(total $summary)" +} |