diff options
Diffstat (limited to 'tools/testing/selftests/bpf/map_tests')
9 files changed, 2473 insertions, 98 deletions
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c index f0a64d8ac59a..b595556315bc 100644 --- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -3,16 +3,20 @@ #include <stdio.h> #include <errno.h> #include <string.h> +#include <unistd.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> #include <test_maps.h> +static int nr_cpus; + static void map_batch_update(int map_fd, __u32 max_entries, int *keys, - int *values) + __s64 *values, bool is_pcpu) { - int i, err; + int i, j, err; + int cpu_offset = 0; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, @@ -20,22 +24,41 @@ static void map_batch_update(int map_fd, __u32 max_entries, int *keys, for (i = 0; i < max_entries; i++) { keys[i] = i; - values[i] = i + 1; + if (is_pcpu) { + cpu_offset = i * nr_cpus; + for (j = 0; j < nr_cpus; j++) + (values + cpu_offset)[j] = i + 1 + j; + } else { + values[i] = i + 1; + } } err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); } -static void map_batch_verify(int *visited, __u32 max_entries, - int *keys, int *values) +static void map_batch_verify(int *visited, __u32 max_entries, int *keys, + __s64 *values, bool is_pcpu) { - int i; + int i, j; + int cpu_offset = 0; memset(visited, 0, max_entries * sizeof(*visited)); for (i = 0; i < max_entries; i++) { - CHECK(keys[i] + 1 != values[i], "key/value checking", - "error: i %d key %d value %d\n", i, keys[i], values[i]); + if (is_pcpu) { + cpu_offset = i * nr_cpus; + for (j = 0; j < nr_cpus; j++) { + __s64 value = (values + cpu_offset)[j]; + CHECK(keys[i] + j + 1 != value, + "key/value checking", + "error: i %d j %d key %d value %lld\n", i, + j, keys[i], value); + } + } else { + CHECK(keys[i] + 1 != values[i], "key/value checking", + "error: i %d key %d value %lld\n", i, keys[i], + values[i]); + } visited[i] = 1; } for (i = 0; i < max_entries; i++) { @@ -44,59 +67,53 @@ static void map_batch_verify(int *visited, __u32 max_entries, } } -void test_array_map_batch_ops(void) +static void __test_map_lookup_and_update_batch(bool is_pcpu) { - struct bpf_create_map_attr xattr = { - .name = "array_map", - .map_type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - }; - int map_fd, *keys, *values, *visited; + int map_fd, *keys, *visited; __u32 count, total, total_success; const __u32 max_entries = 10; - bool nospace_err; __u64 batch = 0; - int err, step; + int err, step, value_size; + void *values; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, ); - xattr.max_entries = max_entries; - map_fd = bpf_create_map_xattr(&xattr); + map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY, + "array_map", sizeof(int), sizeof(__s64), max_entries, NULL); CHECK(map_fd == -1, - "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + "bpf_map_create()", "error:%s\n", strerror(errno)); + + value_size = sizeof(__s64); + if (is_pcpu) + value_size *= nr_cpus; - keys = malloc(max_entries * sizeof(int)); - values = malloc(max_entries * sizeof(int)); - visited = malloc(max_entries * sizeof(int)); + keys = calloc(max_entries, sizeof(*keys)); + values = calloc(max_entries, value_size); + visited = calloc(max_entries, sizeof(*visited)); CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", strerror(errno)); - /* populate elements to the map */ - map_batch_update(map_fd, max_entries, keys, values); - /* test 1: lookup in a loop with various steps. */ total_success = 0; for (step = 1; step < max_entries; step++) { - map_batch_update(map_fd, max_entries, keys, values); - map_batch_verify(visited, max_entries, keys, values); + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); memset(keys, 0, max_entries * sizeof(*keys)); - memset(values, 0, max_entries * sizeof(*values)); + memset(values, 0, max_entries * value_size); batch = 0; total = 0; /* iteratively lookup/delete elements with 'step' * elements each. */ count = step; - nospace_err = false; while (true) { err = bpf_map_lookup_batch(map_fd, - total ? &batch : NULL, &batch, - keys + total, - values + total, - &count, &opts); + total ? &batch : NULL, + &batch, keys + total, + values + total * value_size, + &count, &opts); CHECK((err && errno != ENOENT), "lookup with steps", "error: %s\n", strerror(errno)); @@ -107,13 +124,10 @@ void test_array_map_batch_ops(void) } - if (nospace_err == true) - continue; - CHECK(total != max_entries, "lookup with steps", "total = %u, max_entries = %u\n", total, max_entries); - map_batch_verify(visited, max_entries, keys, values); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); total_success++; } @@ -121,9 +135,31 @@ void test_array_map_batch_ops(void) CHECK(total_success == 0, "check total_success", "unexpected failure\n"); - printf("%s:PASS\n", __func__); - free(keys); free(values); free(visited); + close(map_fd); +} + +static void array_map_batch_ops(void) +{ + __test_map_lookup_and_update_batch(false); + printf("test_%s:PASS\n", __func__); +} + +static void array_percpu_map_batch_ops(void) +{ + __test_map_lookup_and_update_batch(true); + printf("test_%s:PASS\n", __func__); +} + +void test_array_map_batch_ops(void) +{ + nr_cpus = libbpf_num_possible_cpus(); + + CHECK(nr_cpus < 0, "nr_cpus checking", + "error: get possible cpus failed"); + + array_map_batch_ops(); + array_percpu_map_batch_ops(); } diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c index 976bf415fbdd..5da493b94ae2 100644 --- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <errno.h> #include <string.h> +#include <unistd.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -83,22 +84,15 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) int err, step, value_size; bool nospace_err; void *values; - struct bpf_create_map_attr xattr = { - .name = "hash_map", - .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : - BPF_MAP_TYPE_HASH, - .key_size = sizeof(int), - .value_size = sizeof(int), - }; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, ); - xattr.max_entries = max_entries; - map_fd = bpf_create_map_xattr(&xattr); + map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : BPF_MAP_TYPE_HASH, + "hash_map", sizeof(int), sizeof(int), max_entries, NULL); CHECK(map_fd == -1, - "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + "bpf_map_create()", "error:%s\n", strerror(errno)); value_size = is_pcpu ? sizeof(value) : sizeof(int); keys = malloc(max_entries * sizeof(int)); @@ -203,7 +197,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) CHECK(total != max_entries, "delete with steps", "total = %u, max_entries = %u\n", total, max_entries); - /* check map is empty, errono == ENOENT */ + /* check map is empty, errno == ENOENT */ err = bpf_map_get_next_key(map_fd, NULL, &key); CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", "error: %s\n", strerror(errno)); @@ -262,6 +256,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) free(visited); if (!is_pcpu) free(values); + close(map_fd); } void htab_map_batch_ops(void) diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c new file mode 100644 index 000000000000..d32e4edac930 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c @@ -0,0 +1,1188 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Randomized tests for eBPF longest-prefix-match maps + * + * This program runs randomized tests against the lpm-bpf-map. It implements a + * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked + * lists. The implementation should be pretty straightforward. + * + * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies + * the trie-based bpf-map implementation behaves the same way as tlpm. + */ + +#include <assert.h> +#include <errno.h> +#include <inttypes.h> +#include <linux/bpf.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <endian.h> +#include <arpa/inet.h> +#include <sys/time.h> + +#include <bpf/bpf.h> +#include <test_maps.h> + +#include "bpf_util.h" + +struct tlpm_node { + struct tlpm_node *next; + size_t n_bits; + uint8_t key[]; +}; + +struct lpm_trie_bytes_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned char data[8]; +}; + +struct lpm_trie_int_key { + union { + struct bpf_lpm_trie_key_hdr hdr; + __u32 prefixlen; + }; + unsigned int data; +}; + +static struct tlpm_node *tlpm_match(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits); + +static struct tlpm_node *tlpm_add(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits) +{ + struct tlpm_node *node; + size_t n; + + n = (n_bits + 7) / 8; + + /* 'overwrite' an equivalent entry if one already exists */ + node = tlpm_match(list, key, n_bits); + if (node && node->n_bits == n_bits) { + memcpy(node->key, key, n); + return list; + } + + /* add new entry with @key/@n_bits to @list and return new head */ + + node = malloc(sizeof(*node) + n); + assert(node); + + node->next = list; + node->n_bits = n_bits; + memcpy(node->key, key, n); + + return node; +} + +static void tlpm_clear(struct tlpm_node *list) +{ + struct tlpm_node *node; + + /* free all entries in @list */ + + while ((node = list)) { + list = list->next; + free(node); + } +} + +static struct tlpm_node *tlpm_match(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits) +{ + struct tlpm_node *best = NULL; + size_t i; + + /* Perform longest prefix-match on @key/@n_bits. That is, iterate all + * entries and match each prefix against @key. Remember the "best" + * entry we find (i.e., the longest prefix that matches) and return it + * to the caller when done. + */ + + for ( ; list; list = list->next) { + for (i = 0; i < n_bits && i < list->n_bits; ++i) { + if ((key[i / 8] & (1 << (7 - i % 8))) != + (list->key[i / 8] & (1 << (7 - i % 8)))) + break; + } + + if (i >= list->n_bits) { + if (!best || i > best->n_bits) + best = list; + } + } + + return best; +} + +static struct tlpm_node *tlpm_delete(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits) +{ + struct tlpm_node *best = tlpm_match(list, key, n_bits); + struct tlpm_node *node; + + if (!best || best->n_bits != n_bits) + return list; + + if (best == list) { + node = best->next; + free(best); + return node; + } + + for (node = list; node; node = node->next) { + if (node->next == best) { + node->next = best->next; + free(best); + return list; + } + } + /* should never get here */ + assert(0); + return list; +} + +static void test_lpm_basic(void) +{ + struct tlpm_node *list = NULL, *t1, *t2; + + /* very basic, static tests to verify tlpm works as expected */ + + assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + + t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16)); + assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8)); + assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8)); + assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7)); + + t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15)); + assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16)); + + list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); + + list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8); + assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + + tlpm_clear(list); +} + +static void test_lpm_order(void) +{ + struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL; + size_t i, j; + + /* Verify the tlpm implementation works correctly regardless of the + * order of entries. Insert a random set of entries into @l1, and copy + * the same data in reverse order into @l2. Then verify a lookup of + * random keys will yield the same result in both sets. + */ + + for (i = 0; i < (1 << 12); ++i) + l1 = tlpm_add(l1, (uint8_t[]){ + rand() % 0xff, + rand() % 0xff, + }, rand() % 16 + 1); + + for (t1 = l1; t1; t1 = t1->next) + l2 = tlpm_add(l2, t1->key, t1->n_bits); + + for (i = 0; i < (1 << 8); ++i) { + uint8_t key[] = { rand() % 0xff, rand() % 0xff }; + + t1 = tlpm_match(l1, key, 16); + t2 = tlpm_match(l2, key, 16); + + assert(!t1 == !t2); + if (t1) { + assert(t1->n_bits == t2->n_bits); + for (j = 0; j < t1->n_bits; ++j) + assert((t1->key[j / 8] & (1 << (7 - j % 8))) == + (t2->key[j / 8] & (1 << (7 - j % 8)))); + } + } + + tlpm_clear(l1); + tlpm_clear(l2); +} + +static void test_lpm_map(int keysize) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + volatile size_t n_matches, n_matches_after_delete; + size_t i, j, n_nodes, n_lookups; + struct tlpm_node *t, *list = NULL; + struct bpf_lpm_trie_key_u8 *key; + uint8_t *data, *value; + int r, map; + + /* Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of + * prefixes and insert it into both tlpm and bpf-lpm. Then run some + * randomized lookups and verify both maps return the same result. + */ + + n_matches = 0; + n_matches_after_delete = 0; + n_nodes = 1 << 8; + n_lookups = 1 << 9; + + data = alloca(keysize); + memset(data, 0, keysize); + + value = alloca(keysize + 1); + memset(value, 0, keysize + 1); + + key = alloca(sizeof(*key) + keysize); + memset(key, 0, sizeof(*key) + keysize); + + map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + sizeof(*key) + keysize, + keysize + 1, + 4096, + &opts); + assert(map >= 0); + + for (i = 0; i < n_nodes; ++i) { + for (j = 0; j < keysize; ++j) + value[j] = rand() & 0xff; + value[keysize] = rand() % (8 * keysize + 1); + + list = tlpm_add(list, value, value[keysize]); + + key->prefixlen = value[keysize]; + memcpy(key->data, value, keysize); + r = bpf_map_update_elem(map, key, value, 0); + assert(!r); + } + + for (i = 0; i < n_lookups; ++i) { + for (j = 0; j < keysize; ++j) + data[j] = rand() & 0xff; + + t = tlpm_match(list, data, 8 * keysize); + + key->prefixlen = 8 * keysize; + memcpy(key->data, data, keysize); + r = bpf_map_lookup_elem(map, key, value); + assert(!r || errno == ENOENT); + assert(!t == !!r); + + if (t) { + ++n_matches; + assert(t->n_bits == value[keysize]); + for (j = 0; j < t->n_bits; ++j) + assert((t->key[j / 8] & (1 << (7 - j % 8))) == + (value[j / 8] & (1 << (7 - j % 8)))); + } + } + + /* Remove the first half of the elements in the tlpm and the + * corresponding nodes from the bpf-lpm. Then run the same + * large number of random lookups in both and make sure they match. + * Note: we need to count the number of nodes actually inserted + * since there may have been duplicates. + */ + for (i = 0, t = list; t; i++, t = t->next) + ; + for (j = 0; j < i / 2; ++j) { + key->prefixlen = list->n_bits; + memcpy(key->data, list->key, keysize); + r = bpf_map_delete_elem(map, key); + assert(!r); + list = tlpm_delete(list, list->key, list->n_bits); + assert(list); + } + for (i = 0; i < n_lookups; ++i) { + for (j = 0; j < keysize; ++j) + data[j] = rand() & 0xff; + + t = tlpm_match(list, data, 8 * keysize); + + key->prefixlen = 8 * keysize; + memcpy(key->data, data, keysize); + r = bpf_map_lookup_elem(map, key, value); + assert(!r || errno == ENOENT); + assert(!t == !!r); + + if (t) { + ++n_matches_after_delete; + assert(t->n_bits == value[keysize]); + for (j = 0; j < t->n_bits; ++j) + assert((t->key[j / 8] & (1 << (7 - j % 8))) == + (value[j / 8] & (1 << (7 - j % 8)))); + } + } + + close(map); + tlpm_clear(list); + + /* With 255 random nodes in the map, we are pretty likely to match + * something on every lookup. For statistics, use this: + * + * printf(" nodes: %zu\n" + * " lookups: %zu\n" + * " matches: %zu\n" + * "matches(delete): %zu\n", + * n_nodes, n_lookups, n_matches, n_matches_after_delete); + */ +} + +/* Test the implementation with some 'real world' examples */ + +static void test_lpm_ipaddr(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct bpf_lpm_trie_key_u8 *key_ipv4; + struct bpf_lpm_trie_key_u8 *key_ipv6; + size_t key_size_ipv4; + size_t key_size_ipv6; + int map_fd_ipv4; + int map_fd_ipv6; + __u64 value; + + key_size_ipv4 = sizeof(*key_ipv4) + sizeof(__u32); + key_size_ipv6 = sizeof(*key_ipv6) + sizeof(__u32) * 4; + key_ipv4 = alloca(key_size_ipv4); + key_ipv6 = alloca(key_size_ipv6); + + map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + key_size_ipv4, sizeof(value), + 100, &opts); + assert(map_fd_ipv4 >= 0); + + map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + key_size_ipv6, sizeof(value), + 100, &opts); + assert(map_fd_ipv6 >= 0); + + /* Fill data some IPv4 and IPv6 address ranges */ + value = 1; + key_ipv4->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key_ipv4->data); + assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0); + + value = 2; + key_ipv4->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key_ipv4->data); + assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0); + + value = 3; + key_ipv4->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key_ipv4->data); + assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0); + + value = 5; + key_ipv4->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key_ipv4->data); + assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0); + + value = 4; + key_ipv4->prefixlen = 23; + inet_pton(AF_INET, "192.168.0.0", key_ipv4->data); + assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0); + + value = 0xdeadbeef; + key_ipv6->prefixlen = 64; + inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data); + assert(bpf_map_update_elem(map_fd_ipv6, key_ipv6, &value, 0) == 0); + + /* Set tprefixlen to maximum for lookups */ + key_ipv4->prefixlen = 32; + key_ipv6->prefixlen = 128; + + /* Test some lookups that should come back with a value */ + inet_pton(AF_INET, "192.168.128.23", key_ipv4->data); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0); + assert(value == 3); + + inet_pton(AF_INET, "192.168.0.1", key_ipv4->data); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0); + assert(value == 2); + + inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data); + assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0); + assert(value == 0xdeadbeef); + + inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data); + assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0); + assert(value == 0xdeadbeef); + + /* Test some lookups that should not match any entry */ + inet_pton(AF_INET, "10.0.0.1", key_ipv4->data); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); + + inet_pton(AF_INET, "11.11.11.11", key_ipv4->data); + assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT); + + inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data); + assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT); + + close(map_fd_ipv4); + close(map_fd_ipv6); +} + +static void test_lpm_delete(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct bpf_lpm_trie_key_u8 *key; + size_t key_size; + int map_fd; + __u64 value; + + key_size = sizeof(*key) + sizeof(__u32); + key = alloca(key_size); + + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, + key_size, sizeof(value), + 100, &opts); + assert(map_fd >= 0); + + /* Add nodes: + * 192.168.0.0/16 (1) + * 192.168.0.0/24 (2) + * 192.168.128.0/24 (3) + * 192.168.1.0/24 (4) + * + * (1) + * / \ + * (IM) (3) + * / \ + * (2) (4) + */ + value = 1; + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 2; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 3; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 4; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + /* remove non-existent node */ + key->prefixlen = 32; + inet_pton(AF_INET, "10.0.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); + + key->prefixlen = 30; // unused prefix so far + inet_pton(AF_INET, "192.255.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); + + key->prefixlen = 16; // same prefix as the root node + inet_pton(AF_INET, "192.255.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == -ENOENT); + + /* assert initial lookup */ + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 2); + + /* remove leaf node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove leaf (and intermediary) node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.1.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove root node */ + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 3); + + /* remove last node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT); + + close(map_fd); +} + +static void test_lpm_get_next_key(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct bpf_lpm_trie_key_u8 *key_p, *next_key_p; + size_t key_size; + __u32 value = 0; + int map_fd; + + key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + next_key_p = alloca(key_size); + + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), 100, &opts); + assert(map_fd >= 0); + + /* empty tree. get_next_key should return ENOENT */ + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT); + + /* get and verify the first key, get the second one should fail. */ + key_p->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); + + /* no exact matching key should get the first one in post order. */ + key_p->prefixlen = 8; + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 16 && key_p->data[0] == 192 && + key_p->data[1] == 168); + + /* add one more element (total two) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 128); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); + + /* Add one more element (total three) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); + + /* Add one more element (total four) */ + key_p->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); + + /* Add one more element (total five) */ + key_p->prefixlen = 28; + inet_pton(AF_INET, "192.168.1.128", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1 && + next_key_p->data[3] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT); + + /* no exact matching key should return the first one in post order */ + key_p->prefixlen = 22; + inet_pton(AF_INET, "192.168.1.0", key_p->data); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 0); + + close(map_fd); +} + +#define MAX_TEST_KEYS 4 +struct lpm_mt_test_info { + int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */ + int iter; + int map_fd; + struct { + __u32 prefixlen; + __u32 data; + } key[MAX_TEST_KEYS]; +}; + +static void *lpm_test_command(void *arg) +{ + int i, j, ret, iter, key_size; + struct lpm_mt_test_info *info = arg; + struct bpf_lpm_trie_key_u8 *key_p; + + key_size = sizeof(*key_p) + sizeof(__u32); + key_p = alloca(key_size); + for (iter = 0; iter < info->iter; iter++) + for (i = 0; i < MAX_TEST_KEYS; i++) { + /* first half of iterations in forward order, + * and second half in backward order. + */ + j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1; + key_p->prefixlen = info->key[j].prefixlen; + memcpy(key_p->data, &info->key[j].data, sizeof(__u32)); + if (info->cmd == 0) { + __u32 value = j; + /* update must succeed */ + assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0); + } else if (info->cmd == 1) { + ret = bpf_map_delete_elem(info->map_fd, key_p); + assert(ret == 0 || errno == ENOENT); + } else if (info->cmd == 2) { + __u32 value; + ret = bpf_map_lookup_elem(info->map_fd, key_p, &value); + assert(ret == 0 || errno == ENOENT); + } else { + struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size); + ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p); + assert(ret == 0 || errno == ENOENT || errno == ENOMEM); + } + } + + // Pass successful exit info back to the main thread + pthread_exit((void *)info); +} + +static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) +{ + info->iter = 2000; + info->map_fd = map_fd; + info->key[0].prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", &info->key[0].data); + info->key[1].prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", &info->key[1].data); + info->key[2].prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", &info->key[2].data); + info->key[3].prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", &info->key[3].data); +} + +static void test_lpm_multi_thread(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); + struct lpm_mt_test_info info[4]; + size_t key_size, value_size; + pthread_t thread_id[4]; + int i, map_fd; + void *ret; + + /* create a trie */ + value_size = sizeof(__u32); + key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size; + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts); + + /* create 4 threads to test update, delete, lookup and get_next_key */ + setup_lpm_mt_test_info(&info[0], map_fd); + for (i = 0; i < 4; i++) { + if (i != 0) + memcpy(&info[i], &info[0], sizeof(info[i])); + info[i].cmd = i; + assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0); + } + + for (i = 0; i < 4; i++) + assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]); + + close(map_fd); +} + +static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts); + int fd; + + opts.map_flags = BPF_F_NO_PREALLOC; + fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries, + &opts); + CHECK(fd < 0, "bpf_map_create", "error %d\n", errno); + + return fd; +} + +static void test_lpm_trie_update_flags(void) +{ + struct lpm_trie_int_key key; + unsigned int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* invalid flags (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST); + CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err); + + /* overwrite an empty qp-trie (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add the same node as new node (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "add new elem again", "error %d\n", err); + + /* overwrite the existed node */ + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the node */ + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "update elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite a non-existent node which is the prefix of the first + * node (Error). + */ + key.prefixlen = 8; + key.data = 0; + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + /* add a new node which is the prefix of the first node */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add another new node which will be the sibling of the first node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* overwrite the third node */ + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup key", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* delete the second node to make it an intermediate node */ + key.prefixlen = 8; + key.data = 0; + err = bpf_map_delete_elem(fd, &key); + CHECK(err, "del elem", "error %d\n", err); + + /* overwrite the intermediate node (Error) */ + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err); + + close(fd); +} + +static void test_lpm_trie_update_full_map(void) +{ + struct lpm_trie_int_key key; + int value, got; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), 3); + + /* add a new node */ + key.prefixlen = 16; + key.data = 0; + value = 0; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 8; + key.data = 0; + value = 1; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* add new node */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 2; + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add new elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* try to add more node (Error) */ + key.prefixlen = 32; + key.data = 0; + value = 3; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err); + + /* update the value of an existed node with BPF_EXIST */ + key.prefixlen = 16; + key.data = 0; + value = 4; + err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + /* update the value of an existed node with BPF_ANY */ + key.prefixlen = 9; + key.data = htobe32(1 << 23); + value = 5; + err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); + CHECK(err, "overwrite elem", "error %d\n", err); + got = 0; + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "error %d\n", err); + CHECK(got != value, "check value", "got %d exp %d\n", got, value); + + close(fd); +} + +static int cmp_str(const void *a, const void *b) +{ + const char *str_a = *(const char **)a, *str_b = *(const char **)b; + + return strcmp(str_a, str_b); +} + +/* Save strings in LPM trie. The trailing '\0' for each string will be + * accounted in the prefixlen. The strings returned during the iteration + * should be sorted as expected. + */ +static void test_lpm_trie_iterate_strs(void) +{ + static const char * const keys[] = { + "ab", "abO", "abc", "abo", "abS", "abcd", + }; + const char *sorted_keys[ARRAY_SIZE(keys)]; + struct lpm_trie_bytes_key key, next_key; + unsigned int value, got, i, j, len; + struct lpm_trie_bytes_key *cur; + int fd, err; + + fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys)); + + for (i = 0; i < ARRAY_SIZE(keys); i++) { + unsigned int flags; + + /* add i-th element */ + flags = i % 2 ? BPF_NOEXIST : 0; + len = strlen(keys[i]); + /* include the trailing '\0' */ + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[i], len); + value = i + 100; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "add elem", "#%u error %d\n", i, err); + + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u error %d\n", i, err); + CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got); + + /* re-add i-th element (Error) */ + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err); + + /* Overwrite i-th element */ + flags = i % 2 ? 0 : BPF_EXIST; + value = i; + err = bpf_map_update_elem(fd, &key, &value, flags); + CHECK(err, "update elem", "error %d\n", err); + + /* Lookup #[0~i] elements */ + for (j = 0; j <= i; j++) { + len = strlen(keys[j]); + key.prefixlen = (len + 1) * 8; + memset(key.data, 0, sizeof(key.data)); + memcpy(key.data, keys[j], len); + err = bpf_map_lookup_elem(fd, &key, &got); + CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err); + CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n", + i, j, value, got); + } + } + + /* Add element to a full qp-trie (Error) */ + key.prefixlen = sizeof(key.data) * 8; + memset(key.data, 0, sizeof(key.data)); + value = 0; + err = bpf_map_update_elem(fd, &key, &value, 0); + CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err); + + /* Iterate sorted elements: no deletion */ + memcpy(sorted_keys, keys, sizeof(keys)); + qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str); + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + /* Iterate sorted elements: delete the found key after each iteration */ + cur = NULL; + for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) { + len = strlen(sorted_keys[i]); + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != (len + 1) * 8, "iterate", + "#%u invalid len %u expect %u\n", + i, next_key.prefixlen, (len + 1) * 8); + CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate", + "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]); + + cur = &next_key; + + err = bpf_map_delete_elem(fd, cur); + CHECK(err, "delete", "#%u error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err); + + close(fd); +} + +/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of + * LPM trie will return these integers in big-endian order, therefore, convert + * these integers to big-endian before update. After each iteration, delete the + * found key (the smallest integer) and expect the next iteration will return + * the second smallest number. + */ +static void test_lpm_trie_iterate_ints(void) +{ + struct lpm_trie_int_key key, next_key; + unsigned int i, max_entries; + struct lpm_trie_int_key *cur; + unsigned int *data_set; + int fd, err; + bool value; + + max_entries = 4096; + data_set = calloc(max_entries, sizeof(*data_set)); + CHECK(!data_set, "malloc", "no mem\n"); + for (i = 0; i < max_entries; i++) + data_set[i] = i; + + fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries); + value = true; + for (i = 0; i < max_entries; i++) { + key.prefixlen = 32; + key.data = htobe32(data_set[i]); + + err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(err, "add elem", "#%u error %d\n", i, err); + } + + cur = NULL; + for (i = 0; i < max_entries; i++) { + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err, "iterate", "#%u error %d\n", i, err); + CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n", + i, next_key.prefixlen); + CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n", + i, be32toh(next_key.data), data_set[i]); + cur = &next_key; + + /* + * Delete the minimal key, the next call of bpf_get_next_key() + * will return the second minimal key. + */ + err = bpf_map_delete_elem(fd, &next_key); + CHECK(err, "del elem", "#%u elem error %d\n", i, err); + } + err = bpf_map_get_next_key(fd, cur, &next_key); + CHECK(err != -ENOENT, "more element", "error %d\n", err); + + err = bpf_map_get_next_key(fd, NULL, &next_key); + CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err); + + free(data_set); + + close(fd); +} + +void test_lpm_trie_map_basic_ops(void) +{ + int i; + + /* we want predictable, pseudo random tests */ + srand(0xf00ba1); + + test_lpm_basic(); + test_lpm_order(); + + /* Test with 8, 16, 24, 32, ... 128 bit prefix length */ + for (i = 1; i <= 16; ++i) + test_lpm_map(i); + + test_lpm_ipaddr(); + test_lpm_delete(); + test_lpm_get_next_key(); + test_lpm_multi_thread(); + + test_lpm_trie_update_flags(); + test_lpm_trie_update_full_map(); + test_lpm_trie_iterate_strs(); + test_lpm_trie_iterate_ints(); + + printf("%s: PASS\n", __func__); +} diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c new file mode 100644 index 000000000000..fe3e19f96244 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <arpa/inet.h> +#include <linux/bpf.h> +#include <netinet/in.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +struct test_lpm_key { + __u32 prefix; + struct in_addr ipv4; +}; + +static void map_batch_update(int map_fd, __u32 max_entries, + struct test_lpm_key *keys, int *values) +{ + __u32 i; + int err; + char buff[16] = { 0 }; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + for (i = 0; i < max_entries; i++) { + keys[i].prefix = 32; + snprintf(buff, 16, "192.168.1.%d", i + 1); + inet_pton(AF_INET, buff, &keys[i].ipv4); + values[i] = i + 1; + } + + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); +} + +static void map_batch_verify(int *visited, __u32 max_entries, + struct test_lpm_key *keys, int *values) +{ + char buff[16] = { 0 }; + int lower_byte = 0; + __u32 i; + + memset(visited, 0, max_entries * sizeof(*visited)); + for (i = 0; i < max_entries; i++) { + inet_ntop(AF_INET, &keys[i].ipv4, buff, 32); + CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF, + "sscanf()", "error: i %d\n", i); + CHECK(lower_byte != values[i], "key/value checking", + "error: i %d key %s value %d\n", i, buff, values[i]); + visited[i] = 1; + } + for (i = 0; i < max_entries; i++) { + CHECK(visited[i] != 1, "visited checking", + "error: keys array at index %d missing\n", i); + } +} + +void test_lpm_trie_map_batch_ops(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, create_opts, .map_flags = BPF_F_NO_PREALLOC); + struct test_lpm_key *keys, key; + int map_fd, *values, *visited; + __u32 step, count, total, total_success; + const __u32 max_entries = 10; + __u64 batch = 0; + int err; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map", + sizeof(struct test_lpm_key), sizeof(int), + max_entries, &create_opts); + CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n", + strerror(errno)); + + keys = malloc(max_entries * sizeof(struct test_lpm_key)); + values = malloc(max_entries * sizeof(int)); + visited = malloc(max_entries * sizeof(int)); + CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", + strerror(errno)); + + total_success = 0; + for (step = 1; step < max_entries; step++) { + map_batch_update(map_fd, max_entries, keys, values); + map_batch_verify(visited, max_entries, keys, values); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * sizeof(*values)); + batch = 0; + total = 0; + /* iteratively lookup/delete elements with 'step' + * elements each. + */ + count = step; + while (true) { + err = bpf_map_lookup_batch(map_fd, + total ? &batch : NULL, &batch, + keys + total, values + total, &count, &opts); + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + } + + CHECK(total != max_entries, "lookup with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + map_batch_verify(visited, max_entries, keys, values); + + total = 0; + count = step; + while (total < max_entries) { + if (max_entries - total < step) + count = max_entries - total; + err = bpf_map_delete_batch(map_fd, keys + total, &count, + &opts); + CHECK((err && errno != ENOENT), "delete batch", + "error: %s\n", strerror(errno)); + total += count; + if (err) + break; + } + CHECK(total != max_entries, "delete with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + /* check map is empty, errno == ENOENT */ + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", + "error: %s\n", strerror(errno)); + + total_success++; + } + + CHECK(total_success == 0, "check total_success", + "unexpected failure\n"); + + printf("%s:PASS\n", __func__); + + free(keys); + free(values); + free(visited); + close(map_fd); +} diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c new file mode 100644 index 000000000000..0ba015686492 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <linux/bpf.h> +#include <stdio.h> +#include <stdbool.h> +#include <unistd.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +struct test_lpm_key { + __u32 prefix; + __u32 data; +}; + +struct get_next_key_ctx { + struct test_lpm_key key; + bool start; + bool stop; + int map_fd; + int loop; +}; + +static void *get_next_key_fn(void *arg) +{ + struct get_next_key_ctx *ctx = arg; + struct test_lpm_key next_key; + int i = 0; + + while (!ctx->start) + usleep(1); + + while (!ctx->stop && i++ < ctx->loop) + bpf_map_get_next_key(ctx->map_fd, &ctx->key, &next_key); + + return NULL; +} + +static void abort_get_next_key(struct get_next_key_ctx *ctx, pthread_t *tids, + unsigned int nr) +{ + unsigned int i; + + ctx->stop = true; + ctx->start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +/* This test aims to prevent regression of future. As long as the kernel does + * not panic, it is considered as success. + */ +void test_lpm_trie_map_get_next_key(void) +{ +#define MAX_NR_THREADS 8 + LIBBPF_OPTS(bpf_map_create_opts, create_opts, + .map_flags = BPF_F_NO_PREALLOC); + struct test_lpm_key key = {}; + __u32 val = 0; + int map_fd; + const __u32 max_prefixlen = 8 * (sizeof(key) - sizeof(key.prefix)); + const __u32 max_entries = max_prefixlen + 1; + unsigned int i, nr = MAX_NR_THREADS, loop = 65536; + pthread_t tids[MAX_NR_THREADS]; + struct get_next_key_ctx ctx; + int err; + + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map", + sizeof(struct test_lpm_key), sizeof(__u32), + max_entries, &create_opts); + CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n", + strerror(errno)); + + for (i = 0; i <= max_prefixlen; i++) { + key.prefix = i; + err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); + CHECK(err, "bpf_map_update_elem()", "error:%s\n", + strerror(errno)); + } + + ctx.start = false; + ctx.stop = false; + ctx.map_fd = map_fd; + ctx.loop = loop; + memcpy(&ctx.key, &key, sizeof(key)); + + for (i = 0; i < nr; i++) { + err = pthread_create(&tids[i], NULL, get_next_key_fn, &ctx); + if (err) { + abort_get_next_key(&ctx, tids, i); + CHECK(err, "pthread_create", "error %d\n", err); + } + } + + ctx.start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); + + printf("%s:PASS\n", __func__); + + close(map_fd); +} diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c new file mode 100644 index 000000000000..79c3ccadb962 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -0,0 +1,278 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +#define OUTER_MAP_ENTRIES 10 + +static __u32 get_map_id_from_fd(int map_fd) +{ + struct bpf_map_info map_info = {}; + uint32_t info_len = sizeof(map_info); + int ret; + + ret = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); + CHECK(ret < 0, "Finding map info failed", "error:%s\n", + strerror(errno)); + + return map_info.id; +} + +/* This creates number of OUTER_MAP_ENTRIES maps that will be stored + * in outer map and return the created map_fds + */ +static void create_inner_maps(enum bpf_map_type map_type, + __u32 *inner_map_fds) +{ + int map_fd, map_index, ret; + __u32 map_key = 0, map_id; + char map_name[16]; + + for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) { + memset(map_name, 0, sizeof(map_name)); + snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index); + map_fd = bpf_map_create(map_type, map_name, sizeof(__u32), + sizeof(__u32), 1, NULL); + CHECK(map_fd < 0, + "inner bpf_map_create() failed", + "map_type=(%d) map_name(%s), error:%s\n", + map_type, map_name, strerror(errno)); + + /* keep track of the inner map fd as it is required + * to add records in outer map + */ + inner_map_fds[map_index] = map_fd; + + /* Add entry into this created map + * eg: map1 key = 0, value = map1's map id + * map2 key = 0, value = map2's map id + */ + map_id = get_map_id_from_fd(map_fd); + ret = bpf_map_update_elem(map_fd, &map_key, &map_id, 0); + CHECK(ret != 0, + "bpf_map_update_elem failed", + "map_type=(%d) map_name(%s), error:%s\n", + map_type, map_name, strerror(errno)); + } +} + +static int create_outer_map(enum bpf_map_type map_type, __u32 inner_map_fd) +{ + int outer_map_fd; + LIBBPF_OPTS(bpf_map_create_opts, attr); + + attr.inner_map_fd = inner_map_fd; + outer_map_fd = bpf_map_create(map_type, "outer_map", sizeof(__u32), + sizeof(__u32), OUTER_MAP_ENTRIES, + &attr); + CHECK(outer_map_fd < 0, + "outer bpf_map_create()", + "map_type=(%d), error:%s\n", + map_type, strerror(errno)); + + return outer_map_fd; +} + +static void validate_fetch_results(int outer_map_fd, + __u32 *fetched_keys, __u32 *fetched_values, + __u32 max_entries_fetched) +{ + __u32 inner_map_key, inner_map_value; + int inner_map_fd, entry, err; + __u32 outer_map_value; + + for (entry = 0; entry < max_entries_fetched; ++entry) { + outer_map_value = fetched_values[entry]; + inner_map_fd = bpf_map_get_fd_by_id(outer_map_value); + CHECK(inner_map_fd < 0, + "Failed to get inner map fd", + "from id(%d), error=%s\n", + outer_map_value, strerror(errno)); + err = bpf_map_get_next_key(inner_map_fd, NULL, &inner_map_key); + CHECK(err != 0, + "Failed to get inner map key", + "error=%s\n", strerror(errno)); + + err = bpf_map_lookup_elem(inner_map_fd, &inner_map_key, + &inner_map_value); + + close(inner_map_fd); + + CHECK(err != 0, + "Failed to get inner map value", + "for key(%d), error=%s\n", + inner_map_key, strerror(errno)); + + /* Actual value validation */ + CHECK(outer_map_value != inner_map_value, + "Failed to validate inner map value", + "fetched(%d) and lookedup(%d)!\n", + outer_map_value, inner_map_value); + } +} + +static void fetch_and_validate(int outer_map_fd, + struct bpf_map_batch_opts *opts, + __u32 batch_size, bool delete_entries, + bool has_holes) +{ + int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes; + __u32 *fetched_keys, *fetched_values, total_fetched = 0, i; + __u32 batch_key = 0, fetch_count, step_size = batch_size; + __u32 value_size = sizeof(__u32); + + /* Total entries needs to be fetched */ + fetched_keys = calloc(max_entries, value_size); + fetched_values = calloc(max_entries, value_size); + CHECK((!fetched_keys || !fetched_values), + "Memory allocation failed for fetched_keys or fetched_values", + "error=%s\n", strerror(errno)); + + /* hash map may not always return full batch */ + for (i = 0; i < OUTER_MAP_ENTRIES; i++) { + fetch_count = step_size; + err = delete_entries + ? bpf_map_lookup_and_delete_batch(outer_map_fd, + total_fetched ? &batch_key : NULL, + &batch_key, + fetched_keys + total_fetched, + fetched_values + total_fetched, + &fetch_count, opts) + : bpf_map_lookup_batch(outer_map_fd, + total_fetched ? &batch_key : NULL, + &batch_key, + fetched_keys + total_fetched, + fetched_values + total_fetched, + &fetch_count, opts); + + if (err && errno == ENOSPC) { + /* Fetch again with higher batch size */ + total_fetched = 0; + step_size += batch_size; + continue; + } + + CHECK((err < 0 && (errno != ENOENT)), + "lookup with steps failed", + "error: %s\n", strerror(errno)); + + /* Update the total fetched number */ + total_fetched += fetch_count; + if (err) + break; + } + + CHECK((total_fetched != max_entries), + "Unable to fetch expected entries !", + "total_fetched(%d) and max_entries(%d) error: (%d):%s\n", + total_fetched, max_entries, errno, strerror(errno)); + + /* validate the fetched entries */ + validate_fetch_results(outer_map_fd, fetched_keys, + fetched_values, total_fetched); + printf("batch_op(%s) is successful with batch_size(%d)\n", + delete_entries ? "LOOKUP_AND_DELETE" : "LOOKUP", batch_size); + + free(fetched_keys); + free(fetched_values); +} + +static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, + enum bpf_map_type inner_map_type, + bool has_holes) +{ + __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes; + __u32 *outer_map_keys, *inner_map_fds; + LIBBPF_OPTS(bpf_map_batch_opts, opts); + __u32 value_size = sizeof(__u32); + int batch_size[2] = {5, 10}; + __u32 map_index, op_index; + int outer_map_fd, ret; + + outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size); + inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size); + CHECK((!outer_map_keys || !inner_map_fds), + "Memory allocation failed for outer_map_keys or inner_map_fds", + "error=%s\n", strerror(errno)); + + create_inner_maps(inner_map_type, inner_map_fds); + + outer_map_fd = create_outer_map(outer_map_type, *inner_map_fds); + /* create outer map keys */ + for (map_index = 0; map_index < max_entries; map_index++) + outer_map_keys[map_index] = + ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) + ? 9 : 1000) - map_index; + + /* This condition is only meaningful for array of maps. + * + * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say + * max_entries is short for n, then outer_map_keys looks like: + * + * [n, n-1, ... 2, 1] + * + * We change it to + * + * [n, n-1, ... 2, 0] + * + * So it will leave key 1 as a hole. It will serve to test the + * correctness when batch on an array: a "non-exist" key might be + * actually allocated and returned from key iteration. + */ + if (has_holes) + outer_map_keys[max_entries - 1]--; + + /* batch operation - map_update */ + ret = bpf_map_update_batch(outer_map_fd, outer_map_keys, + inner_map_fds, &max_entries, &opts); + CHECK(ret != 0, + "Failed to update the outer map batch ops", + "error=%s\n", strerror(errno)); + + /* batch operation - map_lookup */ + for (op_index = 0; op_index < 2; ++op_index) + fetch_and_validate(outer_map_fd, &opts, + batch_size[op_index], false, + has_holes); + + /* batch operation - map_lookup_delete */ + if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + fetch_and_validate(outer_map_fd, &opts, + max_entries, true /*delete*/, + has_holes); + + /* close all map fds */ + for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) + close(inner_map_fds[map_index]); + close(outer_map_fd); + + free(inner_map_fds); + free(outer_map_keys); +} + +void test_map_in_map_batch_ops_array(void) +{ + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); + printf("%s:PASS with inner ARRAY map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false); + printf("%s:PASS with inner HASH map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true); + printf("%s:PASS with inner ARRAY map with holes\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true); + printf("%s:PASS with inner HASH map with holes\n", __func__); +} + +void test_map_in_map_batch_ops_hash(void) +{ + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); + printf("%s:PASS with inner ARRAY map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false); + printf("%s:PASS with inner HASH map\n", __func__); +} diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c new file mode 100644 index 000000000000..1c7c04288eff --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -0,0 +1,488 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include <errno.h> +#include <unistd.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <bpf_util.h> +#include <test_maps.h> + +#include "map_percpu_stats.skel.h" + +#define MAX_ENTRIES 16384 +#define MAX_ENTRIES_HASH_OF_MAPS 64 +#define N_THREADS 8 +#define MAX_MAP_KEY_SIZE 4 +#define PCPU_MIN_UNIT_SIZE 32768 + +static void map_info(int map_fd, struct bpf_map_info *info) +{ + __u32 len = sizeof(*info); + int ret; + + memset(info, 0, sizeof(*info)); + + ret = bpf_obj_get_info_by_fd(map_fd, info, &len); + CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno)); +} + +static const char *map_type_to_s(__u32 type) +{ + switch (type) { + case BPF_MAP_TYPE_HASH: + return "HASH"; + case BPF_MAP_TYPE_PERCPU_HASH: + return "PERCPU_HASH"; + case BPF_MAP_TYPE_LRU_HASH: + return "LRU_HASH"; + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + return "LRU_PERCPU_HASH"; + case BPF_MAP_TYPE_HASH_OF_MAPS: + return "BPF_MAP_TYPE_HASH_OF_MAPS"; + default: + return "<define-me>"; + } +} + +static __u32 map_count_elements(__u32 type, int map_fd) +{ + __u32 key = -1; + int n = 0; + + while (!bpf_map_get_next_key(map_fd, &key, &key)) + n++; + return n; +} + +#define BATCH true + +static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count) +{ + static __u8 values[(8 << 10) * MAX_ENTRIES]; + void *in_batch = NULL, *out_batch; + __u32 save_count = count; + int ret; + + ret = bpf_map_lookup_and_delete_batch(map_fd, + &in_batch, &out_batch, + keys, values, &count, + NULL); + + /* + * Despite what uapi header says, lookup_and_delete_batch will return + * -ENOENT in case we successfully have deleted all elements, so check + * this separately + */ + CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch", + "error: %s\n", strerror(errno)); + + CHECK(count != save_count, + "bpf_map_lookup_and_delete_batch", + "deleted not all elements: removed=%u expected=%u\n", + count, save_count); +} + +static void delete_all_elements(__u32 type, int map_fd, bool batch) +{ + static __u8 val[8 << 10]; /* enough for 1024 CPUs */ + __u32 key = -1; + void *keys; + __u32 i, n; + int ret; + + keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES); + CHECK(!keys, "calloc", "error: %s\n", strerror(errno)); + + for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++) + memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE); + + if (batch) { + /* Can't mix delete_batch and delete_and_lookup_batch because + * they have different semantics in relation to the keys + * argument. However, delete_batch utilize map_delete_elem, + * so we actually test it in non-batch scenario */ + delete_and_lookup_batch(map_fd, keys, n); + } else { + /* Intentionally mix delete and lookup_and_delete so we can test both */ + for (i = 0; i < n; i++) { + void *keyp = keys + i*MAX_MAP_KEY_SIZE; + + if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) { + ret = bpf_map_delete_elem(map_fd, keyp); + CHECK(ret < 0, "bpf_map_delete_elem", + "error: key %u: %s\n", i, strerror(errno)); + } else { + ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val); + CHECK(ret < 0, "bpf_map_lookup_and_delete_elem", + "error: key %u: %s\n", i, strerror(errno)); + } + } + } + + free(keys); +} + +static bool is_lru(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_LRU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + +static bool is_percpu(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_PERCPU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + +struct upsert_opts { + __u32 map_type; + int map_fd; + __u32 n; + bool retry_for_nomem; +}; + +static int create_small_hash(void) +{ + int map_fd; + + map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL); + CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", + strerror(errno), "small"); + + return map_fd; +} + +static bool retry_for_nomem_fn(int err) +{ + return err == ENOMEM; +} + +static void *patch_map_thread(void *arg) +{ + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ + static __u8 blob[8 << 10]; + struct upsert_opts *opts = arg; + void *val_ptr; + int val; + int ret; + int i; + + for (i = 0; i < opts->n; i++) { + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + val = create_small_hash(); + val_ptr = &val; + } else if (is_percpu(opts->map_type)) { + val_ptr = blob; + } else { + val = rand(); + val_ptr = &val; + } + + /* 2 seconds may be enough ? */ + if (opts->retry_for_nomem) + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, + 40, retry_for_nomem_fn); + else + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); + CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); + + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + close(val); + } + return NULL; +} + +static void upsert_elements(struct upsert_opts *opts) +{ + pthread_t threads[N_THREADS]; + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(threads); i++) { + ret = pthread_create(&i[threads], NULL, patch_map_thread, opts); + CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret)); + } + + for (i = 0; i < ARRAY_SIZE(threads); i++) { + ret = pthread_join(i[threads], NULL); + CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret)); + } +} + +static __u32 read_cur_elements(int iter_fd) +{ + char buf[64]; + ssize_t n; + __u32 ret; + + n = read(iter_fd, buf, sizeof(buf)-1); + CHECK(n <= 0, "read", "error: %s\n", strerror(errno)); + buf[n] = '\0'; + + errno = 0; + ret = (__u32)strtol(buf, NULL, 10); + CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno)); + + return ret; +} + +static __u32 get_cur_elements(int map_id) +{ + struct map_percpu_stats *skel; + struct bpf_link *link; + __u32 n_elements; + int iter_fd; + int ret; + + skel = map_percpu_stats__open(); + CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno)); + + skel->bss->target_id = map_id; + + ret = map_percpu_stats__load(skel); + CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno)); + + link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL); + CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno)); + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno)); + + n_elements = read_cur_elements(iter_fd); + + close(iter_fd); + bpf_link__destroy(link); + map_percpu_stats__destroy(skel); + + return n_elements; +} + +static void check_expected_number_elements(__u32 n_inserted, int map_fd, + struct bpf_map_info *info) +{ + __u32 n_real; + __u32 n_iter; + + /* Count the current number of elements in the map by iterating through + * all the map keys via bpf_get_next_key + */ + n_real = map_count_elements(info->type, map_fd); + + /* The "real" number of elements should be the same as the inserted + * number of elements in all cases except LRU maps, where some elements + * may have been evicted + */ + if (n_inserted == 0 || !is_lru(info->type)) + CHECK(n_inserted != n_real, "map_count_elements", + "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted); + + /* Count the current number of elements in the map using an iterator */ + n_iter = get_cur_elements(info->id); + + /* Both counts should be the same, as all updates are over */ + CHECK(n_iter != n_real, "get_cur_elements", + "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n", + n_iter, n_real, map_type_to_s(info->type), info->map_flags); +} + +static void __test(int map_fd) +{ + struct upsert_opts opts = { + .map_fd = map_fd, + }; + struct bpf_map_info info; + + map_info(map_fd, &info); + opts.map_type = info.type; + opts.n = info.max_entries; + + /* Reduce the number of elements we are updating such that we don't + * bump into -E2BIG from non-preallocated hash maps, but still will + * have some evictions for LRU maps */ + if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS) + opts.n -= 512; + else + opts.n /= 2; + + /* per-cpu bpf memory allocator may not be able to allocate per-cpu + * pointer successfully and it can not refill free llist timely, and + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate + * the problem temporarily. + */ + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); + + /* + * Upsert keys [0, n) under some competition: with random values from + * N_THREADS threads. Check values, then delete all elements and check + * values again. + */ + upsert_elements(&opts); + check_expected_number_elements(opts.n, map_fd, &info); + delete_all_elements(info.type, map_fd, !BATCH); + check_expected_number_elements(0, map_fd, &info); + + /* Now do the same, but using batch delete operations */ + upsert_elements(&opts); + check_expected_number_elements(opts.n, map_fd, &info); + delete_all_elements(info.type, map_fd, BATCH); + check_expected_number_elements(0, map_fd, &info); + + close(map_fd); +} + +static int map_create_opts(__u32 type, const char *name, + struct bpf_map_create_opts *map_opts, + __u32 key_size, __u32 val_size) +{ + int max_entries; + int map_fd; + + if (type == BPF_MAP_TYPE_HASH_OF_MAPS) + max_entries = MAX_ENTRIES_HASH_OF_MAPS; + else + max_entries = MAX_ENTRIES; + + map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts); + CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", + strerror(errno), name); + + return map_fd; +} + +static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts) +{ + return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int)); +} + +static int create_hash(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); + + return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts); +} + +static int create_percpu_hash(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); + + return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts); +} + +static int create_hash_prealloc(void) +{ + return map_create(BPF_MAP_TYPE_HASH, "hash", NULL); +} + +static int create_percpu_hash_prealloc(void) +{ + return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL); +} + +static int create_lru_hash(__u32 type, __u32 map_flags) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags); + + return map_create(type, "lru_hash", &map_opts); +} + +static int create_hash_of_maps(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .map_flags = BPF_F_NO_PREALLOC, + .inner_map_fd = create_small_hash(), + ); + int ret; + + ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps", + &map_opts, sizeof(int), sizeof(int)); + close(map_opts.inner_map_fd); + return ret; +} + +static void map_percpu_stats_hash(void) +{ + __test(create_hash()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_hash(void) +{ + __test(create_percpu_hash()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_hash_prealloc(void) +{ + __test(create_hash_prealloc()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_hash_prealloc(void) +{ + __test(create_percpu_hash_prealloc()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_lru_hash(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_lru_hash_no_common(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_lru_hash(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_lru_hash_no_common(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_hash_of_maps(void) +{ + __test(create_hash_of_maps()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_map_value_size(void) +{ + int fd; + int value_sz = PCPU_MIN_UNIT_SIZE + 1; + struct bpf_map_create_opts opts = { .sz = sizeof(opts) }; + enum bpf_map_type map_types[] = { BPF_MAP_TYPE_PERCPU_ARRAY, + BPF_MAP_TYPE_PERCPU_HASH, + BPF_MAP_TYPE_LRU_PERCPU_HASH }; + for (int i = 0; i < ARRAY_SIZE(map_types); i++) { + fd = bpf_map_create(map_types[i], NULL, sizeof(__u32), value_sz, 1, &opts); + CHECK(fd < 0 && errno != E2BIG, "percpu map value size", + "error: %s\n", strerror(errno)); + } + printf("test_%s:PASS\n", __func__); +} + +void test_map_percpu_stats(void) +{ + map_percpu_stats_hash(); + map_percpu_stats_percpu_hash(); + map_percpu_stats_hash_prealloc(); + map_percpu_stats_percpu_hash_prealloc(); + map_percpu_stats_lru_hash(); + map_percpu_stats_lru_hash_no_common(); + map_percpu_stats_percpu_lru_hash(); + map_percpu_stats_percpu_lru_hash_no_common(); + map_percpu_stats_hash_of_maps(); + map_percpu_stats_map_value_size(); +} diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index e569edc679d8..af10c309359a 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -19,16 +19,12 @@ #include <test_btf.h> #include <test_maps.h> -static struct bpf_create_map_attr xattr = { - .name = "sk_storage_map", - .map_type = BPF_MAP_TYPE_SK_STORAGE, - .map_flags = BPF_F_NO_PREALLOC, - .max_entries = 0, - .key_size = 4, - .value_size = 8, +static struct bpf_map_create_opts map_opts = { + .sz = sizeof(map_opts), .btf_key_type_id = 1, .btf_value_type_id = 3, .btf_fd = -1, + .map_flags = BPF_F_NO_PREALLOC, }; static unsigned int nr_sk_threads_done; @@ -140,7 +136,7 @@ static int load_btf(void) memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), btf_str_sec, sizeof(btf_str_sec)); - return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); + return bpf_btf_load(raw_btf, sizeof(raw_btf), NULL); } static int create_sk_storage_map(void) @@ -150,13 +146,13 @@ static int create_sk_storage_map(void) btf_fd = load_btf(); CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", btf_fd, errno); - xattr.btf_fd = btf_fd; + map_opts.btf_fd = btf_fd; - map_fd = bpf_create_map_xattr(&xattr); - xattr.btf_fd = -1; + map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); + map_opts.btf_fd = -1; close(btf_fd); CHECK(map_fd == -1, - "bpf_create_map_xattr()", "errno:%d\n", errno); + "bpf_map_create()", "errno:%d\n", errno); return map_fd; } @@ -416,7 +412,7 @@ static void test_sk_storage_map_stress_free(void) rlim_new.rlim_max = rlim_new.rlim_cur + 128; err = setrlimit(RLIMIT_NOFILE, &rlim_new); CHECK(err, "setrlimit(RLIMIT_NOFILE)", "rlim_new:%lu errno:%d", - rlim_new.rlim_cur, errno); + (unsigned long) rlim_new.rlim_cur, errno); } err = do_sk_storage_map_stress_free(); @@ -462,21 +458,21 @@ static void test_sk_storage_map_basic(void) struct { int cnt; int lock; - } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; - struct bpf_create_map_attr bad_xattr; + } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value; + struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; btf_fd = load_btf(); CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", btf_fd, errno); - xattr.btf_fd = btf_fd; + map_opts.btf_fd = btf_fd; sk_fd = socket(AF_INET6, SOCK_STREAM, 0); CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n", sk_fd, errno); - map_fd = bpf_create_map_xattr(&xattr); - CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)", + map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); + CHECK(map_fd == -1, "bpf_map_create(good_xattr)", "map_fd:%d errno:%d\n", map_fd, errno); /* Add new elem */ @@ -487,38 +483,41 @@ static void test_sk_storage_map_basic(void) "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST | BPF_F_LOCK); CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt and update with BPF_EXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST); CHECK(err, "bpf_map_update_elem(BPF_EXIST)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Update with BPF_NOEXIST */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_NOEXIST | BPF_F_LOCK); CHECK(!err || errno != EEXIST, @@ -530,22 +529,23 @@ static void test_sk_storage_map_basic(void) value.cnt -= 1; err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Bump the cnt again and update with map_flags == 0 */ value.cnt += 1; + value.lock = 2; err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value, BPF_F_LOCK); - CHECK(err || lookup_value.cnt != value.cnt, + CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt, "bpf_map_lookup_elem_flags(BPF_F_LOCK)", - "err:%d errno:%d cnt:%x(%x)\n", - err, errno, lookup_value.cnt, value.cnt); + "err:%d errno:%d lock:%x cnt:%x(%x)\n", + err, errno, lookup_value.lock, lookup_value.cnt, value.cnt); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &sk_fd); @@ -560,31 +560,29 @@ static void test_sk_storage_map_basic(void) CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); + memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); bad_xattr.btf_key_type_id = 0; - err = bpf_create_map_xattr(&bad_xattr); - CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); + CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); + memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); bad_xattr.btf_key_type_id = 3; - err = bpf_create_map_xattr(&bad_xattr); - CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); + CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); - bad_xattr.max_entries = 1; - err = bpf_create_map_xattr(&bad_xattr); - CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 1, &map_opts); + CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); + memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); bad_xattr.map_flags = 0; - err = bpf_create_map_xattr(&bad_xattr); + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", "err:%d errno:%d\n", err, errno); - xattr.btf_fd = -1; + map_opts.btf_fd = -1; close(btf_fd); close(map_fd); close(sk_fd); diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c new file mode 100644 index 000000000000..a4121d2248ac --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_util.h" +#include "test_maps.h" +#include "task_local_storage_helpers.h" +#include "read_bpf_task_storage_busy.skel.h" + +struct lookup_ctx { + bool start; + bool stop; + int pid_fd; + int map_fd; + int loop; +}; + +static void *lookup_fn(void *arg) +{ + struct lookup_ctx *ctx = arg; + long value; + int i = 0; + + while (!ctx->start) + usleep(1); + + while (!ctx->stop && i++ < ctx->loop) + bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value); + return NULL; +} + +static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr) +{ + unsigned int i; + + ctx->stop = true; + ctx->start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +void test_task_storage_map_stress_lookup(void) +{ +#define MAX_NR_THREAD 4096 + unsigned int i, nr = 256, loop = 8192, cpu = 0; + struct read_bpf_task_storage_busy *skel; + pthread_t tids[MAX_NR_THREAD]; + struct lookup_ctx ctx; + cpu_set_t old, new; + const char *cfg; + int err; + + cfg = getenv("TASK_STORAGE_MAP_NR_THREAD"); + if (cfg) { + nr = atoi(cfg); + if (nr > MAX_NR_THREAD) + nr = MAX_NR_THREAD; + } + cfg = getenv("TASK_STORAGE_MAP_NR_LOOP"); + if (cfg) + loop = atoi(cfg); + cfg = getenv("TASK_STORAGE_MAP_PIN_CPU"); + if (cfg) + cpu = atoi(cfg); + + skel = read_bpf_task_storage_busy__open_and_load(); + err = libbpf_get_error(skel); + CHECK(err, "open_and_load", "error %d\n", err); + + /* Only for a fully preemptible kernel */ + if (!skel->kconfig->CONFIG_PREEMPTION) { + printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__); + read_bpf_task_storage_busy__destroy(skel); + skips++; + return; + } + + /* Save the old affinity setting */ + sched_getaffinity(getpid(), sizeof(old), &old); + + /* Pinned on a specific CPU */ + CPU_ZERO(&new); + CPU_SET(cpu, &new); + sched_setaffinity(getpid(), sizeof(new), &new); + + ctx.start = false; + ctx.stop = false; + ctx.pid_fd = sys_pidfd_open(getpid(), 0); + ctx.map_fd = bpf_map__fd(skel->maps.task); + ctx.loop = loop; + for (i = 0; i < nr; i++) { + err = pthread_create(&tids[i], NULL, lookup_fn, &ctx); + if (err) { + abort_lookup(&ctx, tids, i); + CHECK(err, "pthread_create", "error %d\n", err); + goto out; + } + } + + ctx.start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); + + skel->bss->pid = getpid(); + err = read_bpf_task_storage_busy__attach(skel); + CHECK(err, "attach", "error %d\n", err); + + /* Trigger program */ + sys_gettid(); + skel->bss->pid = 0; + + CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); +out: + read_bpf_task_storage_busy__destroy(skel); + /* Restore affinity setting */ + sched_setaffinity(getpid(), sizeof(old), &old); + printf("%s:PASS\n", __func__); +} |