aboutsummaryrefslogtreecommitdiffstats
path: root/tools/lib/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lib/bpf')
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile9
-rw-r--r--tools/lib/bpf/README.rst15
-rw-r--r--tools/lib/bpf/btf.c76
-rw-r--r--tools/lib/bpf/btf.h3
-rw-r--r--tools/lib/bpf/libbpf.c8
-rw-r--r--tools/lib/bpf/libbpf.h3
-rw-r--r--tools/lib/bpf/libbpf.map6
-rw-r--r--tools/lib/bpf/xsk.c723
-rw-r--r--tools/lib/bpf/xsk.h203
10 files changed, 1016 insertions, 32 deletions
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index bfd9bfc82c3b..ee9d5362f35b 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1 +1 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 847916273696..a05c43468bd0 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -132,9 +132,9 @@ BPF_IN := $(OUTPUT)libbpf-in.o
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
VERSION_SCRIPT := libbpf.map
-GLOBAL_SYM_COUNT = $(shell readelf -s $(BPF_IN) | \
+GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}')
-VERSIONED_SYM_COUNT = $(shell readelf -s $(OUTPUT)libbpf.so | \
+VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_FILE)
@@ -164,6 +164,9 @@ $(BPF_IN): force elfdep bpfdep
@(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \
(diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true
+ @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \
+ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \
+ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
$(Q)$(MAKE) $(build)=libbpf
$(OUTPUT)libbpf.so: $(BPF_IN)
@@ -174,7 +177,7 @@ $(OUTPUT)libbpf.a: $(BPF_IN)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
- $(QUIET_LINK)$(CXX) $^ -lelf -o $@
+ $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@
check: check_abi
diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst
index 607aae40f4ed..5788479384ca 100644
--- a/tools/lib/bpf/README.rst
+++ b/tools/lib/bpf/README.rst
@@ -9,7 +9,7 @@ described here. It's recommended to follow these conventions whenever a
new function or type is added to keep libbpf API clean and consistent.
All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``.
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``.
System call wrappers
--------------------
@@ -62,6 +62,19 @@ Auxiliary functions and types that don't fit well in any of categories
described above should have ``libbpf_`` prefix, e.g.
``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
+AF_XDP functions
+-------------------
+
+AF_XDP functions should have an ``xsk_`` prefix, e.g.
+``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
+of both low-level ring access functions and high-level configuration
+functions. These can be mixed and matched. Note that these functions
+are not reentrant for performance reasons.
+
+Please take a look at Documentation/networking/af_xdp.rst in the Linux
+kernel source tree on how to use XDP sockets and for some common
+mistakes in case you do not get any traffic up to user space.
+
libbpf ABI
==========
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 68b50e9bbde1..1b8d8cdd3575 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1070,8 +1070,8 @@ done:
return err;
}
-#define BTF_DEDUP_TABLE_SIZE_LOG 14
-#define BTF_DEDUP_TABLE_MOD ((1 << BTF_DEDUP_TABLE_SIZE_LOG) - 1)
+#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14)
+#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31
#define BTF_UNPROCESSED_ID ((__u32)-1)
#define BTF_IN_PROGRESS_ID ((__u32)-2)
@@ -1128,18 +1128,21 @@ static inline __u32 hash_combine(__u32 h, __u32 value)
#undef GOLDEN_RATIO_PRIME
}
-#define for_each_hash_node(table, hash, node) \
- for (node = table[hash & BTF_DEDUP_TABLE_MOD]; node; node = node->next)
+#define for_each_dedup_cand(d, hash, node) \
+ for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \
+ node; \
+ node = node->next)
static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id)
{
struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node));
+ int bucket = hash & (d->opts.dedup_table_size - 1);
if (!node)
return -ENOMEM;
node->type_id = type_id;
- node->next = d->dedup_table[hash & BTF_DEDUP_TABLE_MOD];
- d->dedup_table[hash & BTF_DEDUP_TABLE_MOD] = node;
+ node->next = d->dedup_table[bucket];
+ d->dedup_table[bucket] = node;
return 0;
}
@@ -1177,7 +1180,7 @@ static void btf_dedup_table_free(struct btf_dedup *d)
if (!d->dedup_table)
return;
- for (i = 0; i < (1 << BTF_DEDUP_TABLE_SIZE_LOG); i++) {
+ for (i = 0; i < d->opts.dedup_table_size; i++) {
while (d->dedup_table[i]) {
tmp = d->dedup_table[i];
d->dedup_table[i] = tmp->next;
@@ -1212,19 +1215,37 @@ static void btf_dedup_free(struct btf_dedup *d)
free(d);
}
+/* Find closest power of two >= to size, capped at 2^max_size_log */
+static __u32 roundup_pow2_max(__u32 size, int max_size_log)
+{
+ int i;
+
+ for (i = 0; i < max_size_log && (1U << i) < size; i++)
+ ;
+ return 1U << i;
+}
+
+
static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
const struct btf_dedup_opts *opts)
{
struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
int i, err = 0;
+ __u32 sz;
if (!d)
return ERR_PTR(-ENOMEM);
+ d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
+ sz = opts && opts->dedup_table_size ? opts->dedup_table_size
+ : BTF_DEDUP_TABLE_DEFAULT_SIZE;
+ sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG);
+ d->opts.dedup_table_size = sz;
+
d->btf = btf;
d->btf_ext = btf_ext;
- d->dedup_table = calloc(1 << BTF_DEDUP_TABLE_SIZE_LOG,
+ d->dedup_table = calloc(d->opts.dedup_table_size,
sizeof(struct btf_dedup_node *));
if (!d->dedup_table) {
err = -ENOMEM;
@@ -1249,8 +1270,6 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
for (i = 0; i <= btf->nr_types; i++)
d->hypot_map[i] = BTF_UNPROCESSED_ID;
- d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
-
done:
if (err) {
btf_dedup_free(d);
@@ -1644,7 +1663,7 @@ static __u32 btf_hash_struct(struct btf_type *t)
* IDs. This check is performed during type graph equivalence check and
* referenced types equivalence is checked separately.
*/
-static bool btf_equal_struct(struct btf_type *t1, struct btf_type *t2)
+static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
{
struct btf_member *m1, *m2;
__u16 vlen;
@@ -1824,7 +1843,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_INT:
h = btf_hash_int(t);
- for_each_hash_node(d->dedup_table, h, cand_node) {
+ for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
if (btf_equal_int(t, cand)) {
new_id = cand_node->type_id;
@@ -1835,7 +1854,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_ENUM:
h = btf_hash_enum(t);
- for_each_hash_node(d->dedup_table, h, cand_node) {
+ for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
if (btf_equal_enum(t, cand)) {
new_id = cand_node->type_id;
@@ -1846,7 +1865,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_FWD:
h = btf_hash_common(t);
- for_each_hash_node(d->dedup_table, h, cand_node) {
+ for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
if (btf_equal_common(t, cand)) {
new_id = cand_node->type_id;
@@ -2105,7 +2124,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
struct btf_member *cand_m, *canon_m;
__u16 vlen;
- if (!btf_equal_struct(cand_type, canon_type))
+ if (!btf_shallow_equal_struct(cand_type, canon_type))
return 0;
vlen = BTF_INFO_VLEN(cand_type->info);
cand_m = (struct btf_member *)(cand_type + 1);
@@ -2246,7 +2265,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
{
struct btf_dedup_node *cand_node;
- struct btf_type *t;
+ struct btf_type *cand_type, *t;
/* if we don't find equivalent type, then we are canonical */
__u32 new_id = type_id;
__u16 kind;
@@ -2263,9 +2282,23 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
return 0;
h = btf_hash_struct(t);
- for_each_hash_node(d->dedup_table, h, cand_node) {
+ for_each_dedup_cand(d, h, cand_node) {
int eq;
+ /*
+ * Even though btf_dedup_is_equiv() checks for
+ * btf_shallow_equal_struct() internally when checking two
+ * structs (unions) for equivalence, we need to guard here
+ * from picking matching FWD type as a dedup candidate.
+ * This can happen due to hash collision. In such case just
+ * relying on btf_dedup_is_equiv() would lead to potentially
+ * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
+ * FWD and compatible STRUCT/UNION are considered equivalent.
+ */
+ cand_type = d->btf->types[cand_node->type_id];
+ if (!btf_shallow_equal_struct(t, cand_type))
+ continue;
+
btf_dedup_clear_hypot_map(d);
eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id);
if (eq < 0)
@@ -2326,7 +2359,8 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
struct btf_type *t, *cand;
/* if we don't find equivalent type, then we are representative type */
__u32 new_id = type_id;
- __u32 h, ref_type_id;
+ int ref_type_id;
+ __u32 h;
if (d->map[type_id] == BTF_IN_PROGRESS_ID)
return -ELOOP;
@@ -2349,7 +2383,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
t->type = ref_type_id;
h = btf_hash_common(t);
- for_each_hash_node(d->dedup_table, h, cand_node) {
+ for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
if (btf_equal_common(t, cand)) {
new_id = cand_node->type_id;
@@ -2372,7 +2406,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
info->index_type = ref_type_id;
h = btf_hash_array(t);
- for_each_hash_node(d->dedup_table, h, cand_node) {
+ for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
if (btf_equal_array(t, cand)) {
new_id = cand_node->type_id;
@@ -2403,7 +2437,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
}
h = btf_hash_fnproto(t);
- for_each_hash_node(d->dedup_table, h, cand_node) {
+ for_each_dedup_cand(d, h, cand_node) {
cand = d->btf->types[cand_node->type_id];
if (btf_equal_fnproto(t, cand)) {
new_id = cand_node->type_id;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 94bbc249b0f1..28a1e1e59861 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -76,7 +76,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
-LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext* btf_ext,
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
__u32 *size);
LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
const struct btf_ext *btf_ext,
@@ -90,6 +90,7 @@ LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
struct btf_dedup_opts {
+ unsigned int dedup_table_size;
bool dont_resolve_fwds;
};
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b38dcbe7460a..f5eb60379c8d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2100,7 +2100,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
if (err)
return err;
- bpf_map__for_each(map, obj) {
+ bpf_object__for_each_map(map, obj) {
char buf[PATH_MAX];
int len;
@@ -2147,7 +2147,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
if (!obj)
return -ENOENT;
- bpf_map__for_each(map, obj) {
+ bpf_object__for_each_map(map, obj) {
char buf[PATH_MAX];
int len;
@@ -2835,7 +2835,7 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
{
struct bpf_map *pos;
- bpf_map__for_each(pos, obj) {
+ bpf_object__for_each_map(pos, obj) {
if (pos->name && !strcmp(pos->name, name))
return pos;
}
@@ -2928,7 +2928,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
first_prog = prog;
}
- bpf_map__for_each(map, obj) {
+ bpf_object__for_each_map(map, obj) {
if (!bpf_map__is_offload_neutral(map))
map->map_ifindex = attr->ifindex;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6c0168f8bba5..b4652aa1a58a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -278,10 +278,11 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
LIBBPF_API struct bpf_map *
bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
-#define bpf_map__for_each(pos, obj) \
+#define bpf_object__for_each_map(pos, obj) \
for ((pos) = bpf_map__next(NULL, (obj)); \
(pos) != NULL; \
(pos) = bpf_map__next((pos), (obj)))
+#define bpf_map__for_each bpf_object__for_each_map
LIBBPF_API struct bpf_map *
bpf_map__prev(struct bpf_map *map, struct bpf_object *obj);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 99dfa710c818..778a26702a70 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -147,4 +147,10 @@ LIBBPF_0.0.2 {
btf_ext__new;
btf_ext__reloc_func_info;
btf_ext__reloc_line_info;
+ xsk_umem__create;
+ xsk_socket__create;
+ xsk_umem__delete;
+ xsk_socket__delete;
+ xsk_umem__fd;
+ xsk_socket__fd;
} LIBBPF_0.0.1;
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
new file mode 100644
index 000000000000..f98ac82c9aea
--- /dev/null
+++ b/tools/lib/bpf/xsk.c
@@ -0,0 +1,723 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_util.h"
+#include "xsk.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+struct xsk_umem {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ char *umem_area;
+ struct xsk_umem_config config;
+ int fd;
+ int refcount;
+};
+
+struct xsk_socket {
+ struct xsk_ring_cons *rx;
+ struct xsk_ring_prod *tx;
+ __u64 outstanding_tx;
+ struct xsk_umem *umem;
+ struct xsk_socket_config config;
+ int fd;
+ int xsks_map;
+ int ifindex;
+ int prog_fd;
+ int qidconf_map_fd;
+ int xsks_map_fd;
+ __u32 queue_id;
+ char ifname[IFNAMSIZ];
+};
+
+struct xsk_nl_info {
+ bool xdp_prog_attached;
+ int ifindex;
+ int fd;
+};
+
+/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit.
+ * Unfortunately, it is not part of glibc.
+ */
+static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags,
+ int fd, __u64 offset)
+{
+#ifdef __NR_mmap2
+ unsigned int page_shift = __builtin_ffs(getpagesize()) - 1;
+ long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd,
+ (off_t)(offset >> page_shift));
+
+ return (void *)ret;
+#else
+ return mmap(addr, length, prot, flags, fd, offset);
+#endif
+}
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+ return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+ return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+ unsigned long addr = (unsigned long)buffer;
+
+ return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+ const struct xsk_umem_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ return;
+ }
+
+ cfg->fill_size = usr_cfg->fill_size;
+ cfg->comp_size = usr_cfg->comp_size;
+ cfg->frame_size = usr_cfg->frame_size;
+ cfg->frame_headroom = usr_cfg->frame_headroom;
+}
+
+static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+ const struct xsk_socket_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->libbpf_flags = 0;
+ cfg->xdp_flags = 0;
+ cfg->bind_flags = 0;
+ return;
+ }
+
+ cfg->rx_size = usr_cfg->rx_size;
+ cfg->tx_size = usr_cfg->tx_size;
+ cfg->libbpf_flags = usr_cfg->libbpf_flags;
+ cfg->xdp_flags = usr_cfg->xdp_flags;
+ cfg->bind_flags = usr_cfg->bind_flags;
+}
+
+int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill, struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xdp_mmap_offsets off;
+ struct xdp_umem_reg mr;
+ struct xsk_umem *umem;
+ socklen_t optlen;
+ void *map;
+ int err;
+
+ if (!umem_area || !umem_ptr || !fill || !comp)
+ return -EFAULT;
+ if (!size && !xsk_page_aligned(umem_area))
+ return -EINVAL;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem)
+ return -ENOMEM;
+
+ umem->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (umem->fd < 0) {
+ err = -errno;
+ goto out_umem_alloc;
+ }
+
+ umem->umem_area = umem_area;
+ xsk_set_umem_config(&umem->config, usr_config);
+
+ mr.addr = (uintptr_t)umem_area;
+ mr.len = size;
+ mr.chunk_size = umem->config.frame_size;
+ mr.headroom = umem->config.frame_headroom;
+
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ optlen = sizeof(off);
+ err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ map = xsk_mmap(NULL, off.fr.desc +
+ umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ umem->fd, XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ umem->fill = fill;
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = xsk_mmap(NULL,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ umem->comp = comp;
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->ring = map + off.cr.desc;
+
+ *umem_ptr = umem;
+ return 0;
+
+out_mmap:
+ munmap(umem->fill,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+out_socket:
+ close(umem->fd);
+out_umem_alloc:
+ free(umem);
+ return err;
+}
+
+static int xsk_load_xdp_prog(struct xsk_socket *xsk)
+{
+ char bpf_log_buf[BPF_LOG_BUF_SIZE];
+ int err, prog_fd;
+
+ /* This is the C-program:
+ * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+ * {
+ * int *qidconf, index = ctx->rx_queue_index;
+ *
+ * // A set entry here means that the correspnding queue_id
+ * // has an active AF_XDP socket bound to it.
+ * qidconf = bpf_map_lookup_elem(&qidconf_map, &index);
+ * if (!qidconf)
+ * return XDP_ABORTED;
+ *
+ * if (*qidconf)
+ * return bpf_redirect_map(&xsks_map, index, 0);
+ *
+ * return XDP_PASS;
+ * }
+ */
+ struct bpf_insn prog[] = {
+ /* r1 = *(u32 *)(r1 + 16) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16),
+ /* *(u32 *)(r10 - 4) = r1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ /* if r1 == 0 goto +8 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ /* r1 = *(u32 *)(r1 + 0) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+ /* if r1 == 0 goto +5 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+ /* r2 = *(u32 *)(r10 - 4) */
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
+ BPF_MOV32_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+ /* The jumps are to this instruction */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+ prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
+ "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf,
+ BPF_LOG_BUF_SIZE);
+ if (prog_fd < 0) {
+ pr_warning("BPF log buffer:\n%s", bpf_log_buf);
+ return prog_fd;
+ }
+
+ err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
+ if (err) {
+ close(prog_fd);
+ return err;
+ }
+
+ xsk->prog_fd = prog_fd;
+ return 0;
+}
+
+static int xsk_get_max_queues(struct xsk_socket *xsk)
+{
+ struct ethtool_channels channels;
+ struct ifreq ifr;
+ int fd, err, ret;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ channels.cmd = ETHTOOL_GCHANNELS;
+ ifr.ifr_data = (void *)&channels;
+ strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ);
+ err = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (err && errno != EOPNOTSUPP) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (channels.max_combined == 0 || errno == EOPNOTSUPP)
+ /* If the device says it has no channels, then all traffic
+ * is sent to a single stream, so max queues = 1.
+ */
+ ret = 1;
+ else
+ ret = channels.max_combined;
+
+out:
+ close(fd);
+ return ret;
+}
+
+static int xsk_create_bpf_maps(struct xsk_socket *xsk)
+{
+ int max_queues;
+ int fd;
+
+ max_queues = xsk_get_max_queues(xsk);
+ if (max_queues < 0)
+ return max_queues;
+
+ fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map",
+ sizeof(int), sizeof(int), max_queues, 0);
+ if (fd < 0)
+ return fd;
+ xsk->qidconf_map_fd = fd;
+
+ fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
+ sizeof(int), sizeof(int), max_queues, 0);
+ if (fd < 0) {
+ close(xsk->qidconf_map_fd);
+ return fd;
+ }
+ xsk->xsks_map_fd = fd;
+
+ return 0;
+}
+
+static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
+{
+ close(xsk->qidconf_map_fd);
+ close(xsk->xsks_map_fd);
+}
+
+static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value,
+ int xsks_value)
+{
+ bool qidconf_map_updated = false, xsks_map_updated = false;
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_len = sizeof(prog_info);
+ struct bpf_map_info map_info;
+ __u32 map_len = sizeof(map_info);
+ __u32 *map_ids;
+ int reset_value = 0;
+ __u32 num_maps;
+ unsigned int i;
+ int err;
+
+ err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ if (err)
+ return err;
+
+ num_maps = prog_info.nr_map_ids;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
+ if (!map_ids)
+ return -ENOMEM;
+
+ memset(&prog_info, 0, prog_len);
+ prog_info.nr_map_ids = num_maps;
+ prog_info.map_ids = (__u64)(unsigned long)map_ids;
+
+ err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ if (err)
+ goto out_map_ids;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ int fd;
+
+ fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (fd < 0) {
+ err = -errno;
+ goto out_maps;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
+ if (err)
+ goto out_maps;
+
+ if (!strcmp(map_info.name, "qidconf_map")) {
+ err = bpf_map_update_elem(fd, &xsk->queue_id,
+ &qidconf_value, 0);
+ if (err)
+ goto out_maps;
+ qidconf_map_updated = true;
+ xsk->qidconf_map_fd = fd;
+ } else if (!strcmp(map_info.name, "xsks_map")) {
+ err = bpf_map_update_elem(fd, &xsk->queue_id,
+ &xsks_value, 0);
+ if (err)
+ goto out_maps;
+ xsks_map_updated = true;
+ xsk->xsks_map_fd = fd;
+ }
+
+ if (qidconf_map_updated && xsks_map_updated)
+ break;
+ }
+
+ if (!(qidconf_map_updated && xsks_map_updated)) {
+ err = -ENOENT;
+ goto out_maps;
+ }
+
+ err = 0;
+ goto out_success;
+
+out_maps:
+ if (qidconf_map_updated)
+ (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id,
+ &reset_value, 0);
+ if (xsks_map_updated)
+ (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+ &reset_value, 0);
+out_success:
+ if (qidconf_map_updated)
+ close(xsk->qidconf_map_fd);
+ if (xsks_map_updated)
+ close(xsk->xsks_map_fd);
+out_map_ids:
+ free(map_ids);
+ return err;
+}
+
+static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+{
+ bool prog_attached = false;
+ __u32 prog_id = 0;
+ int err;
+
+ err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+ xsk->config.xdp_flags);
+ if (err)
+ return err;
+
+ if (!prog_id) {
+ prog_attached = true;
+ err = xsk_create_bpf_maps(xsk);
+ if (err)
+ return err;
+
+ err = xsk_load_xdp_prog(xsk);
+ if (err)
+ goto out_maps;
+ } else {
+ xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ }
+
+ err = xsk_update_bpf_maps(xsk, true, xsk->fd);
+ if (err)
+ goto out_load;
+
+ return 0;
+
+out_load:
+ if (prog_attached)
+ close(xsk->prog_fd);
+out_maps:
+ if (prog_attached)
+ xsk_delete_bpf_maps(xsk);
+ return err;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
+{
+ struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
+ struct xsk_socket *xsk;
+ socklen_t optlen;
+ void *map;
+ int err;
+
+ if (!umem || !xsk_ptr || !rx || !tx)
+ return -EFAULT;
+
+ if (umem->refcount) {
+ pr_warning("Error: shared umems not supported by libbpf.\n");
+ return -EBUSY;
+ }
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ return -ENOMEM;
+
+ if (umem->refcount++ > 0) {
+ xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (xsk->fd < 0) {
+ err = -errno;
+ goto out_xsk_alloc;
+ }
+ } else {
+ xsk->fd = umem->fd;
+ }
+
+ xsk->outstanding_tx = 0;
+ xsk->queue_id = queue_id;
+ xsk->umem = umem;
+ xsk->ifindex = if_nametoindex(ifname);
+ if (!xsk->ifindex) {
+ err = -errno;
+ goto out_socket;
+ }
+ strncpy(xsk->ifname, ifname, IFNAMSIZ);
+
+ xsk_set_xdp_socket_config(&xsk->config, usr_config);
+
+ if (rx) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+ &xsk->config.rx_size,
+ sizeof(xsk->config.rx_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ }
+ if (tx) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+ &xsk->config.tx_size,
+ sizeof(xsk->config.tx_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ }
+
+ optlen = sizeof(off);
+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ if (rx) {
+ map = xsk_mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ rx->mask = xsk->config.rx_size - 1;
+ rx->size = xsk->config.rx_size;
+ rx->producer = map + off.rx.producer;
+ rx->consumer = map + off.rx.consumer;
+ rx->ring = map + off.rx.desc;
+ }
+ xsk->rx = rx;
+
+ if (tx) {
+ map = xsk_mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap_rx;
+ }
+
+ tx->mask = xsk->config.tx_size - 1;
+ tx->size = xsk->config.tx_size;
+ tx->producer = map + off.tx.producer;
+ tx->consumer = map + off.tx.consumer;
+ tx->ring = map + off.tx.desc;
+ tx->cached_cons = xsk->config.tx_size;
+ }
+ xsk->tx = tx;
+
+ sxdp.sxdp_family = PF_XDP;
+ sxdp.sxdp_ifindex = xsk->ifindex;
+ sxdp.sxdp_queue_id = xsk->queue_id;
+ sxdp.sxdp_flags = xsk->config.bind_flags;
+
+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+ if (err) {
+ err = -errno;
+ goto out_mmap_tx;
+ }
+
+ if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+ err = xsk_setup_xdp_prog(xsk);
+ if (err)
+ goto out_mmap_tx;
+ }
+
+ *xsk_ptr = xsk;
+ return 0;
+
+out_mmap_tx:
+ if (tx)
+ munmap(xsk->tx,
+ off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+ if (rx)
+ munmap(xsk->rx,
+ off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc));
+out_socket:
+ if (--umem->refcount)
+ close(xsk->fd);
+out_xsk_alloc:
+ free(xsk);
+ return err;
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+ struct xdp_mmap_offsets off;
+ socklen_t optlen;
+ int err;
+
+ if (!umem)
+ return 0;
+
+ if (umem->refcount)
+ return -EBUSY;
+
+ optlen = sizeof(off);
+ err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+ if (!err) {
+ munmap(umem->fill->ring,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp->ring,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
+ close(umem->fd);
+ free(umem);
+
+ return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+ struct xdp_mmap_offsets off;
+ socklen_t optlen;
+ int err;
+
+ if (!xsk)
+ return;
+
+ (void)xsk_update_bpf_maps(xsk, 0, 0);
+
+ optlen = sizeof(off);
+ err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
+ if (!err) {
+ if (xsk->rx)
+ munmap(xsk->rx->ring,
+ off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc));
+ if (xsk->tx)
+ munmap(xsk->tx->ring,
+ off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc));
+ }
+
+ xsk->umem->refcount--;
+ /* Do not close an fd that also has an associated umem connected
+ * to it.
+ */
+ if (xsk->fd != xsk->umem->fd)
+ close(xsk->fd);
+ free(xsk);
+}
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
new file mode 100644
index 000000000000..a497f00e2962
--- /dev/null
+++ b/tools/lib/bpf/xsk.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __LIBBPF_XSK_H
+#define __LIBBPF_XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/if_xdp.h>
+
+#include "libbpf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+ __u32 cached_prod; \
+ __u32 cached_cons; \
+ __u32 mask; \
+ __u32 size; \
+ __u32 *producer; \
+ __u32 *consumer; \
+ void *ring; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+ __u32 idx)
+{
+ __u64 *addrs = (__u64 *)fill->ring;
+
+ return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+ const __u64 *addrs = (const __u64 *)comp->ring;
+
+ return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+ __u32 idx)
+{
+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+ return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+ return &descs[idx & rx->mask];
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+ __u32 free_entries = r->cached_cons - r->cached_prod;
+
+ if (free_entries >= nb)
+ return free_entries;
+
+ /* Refresh the local tail pointer.
+ * cached_cons is r->size bigger than the real consumer pointer so
+ * that this addition can be avoided in the more frequently
+ * executed code that computs free_entries in the beginning of
+ * this function. Without this optimization it whould have been
+ * free_entries = r->cached_prod - r->cached_cons + r->size.
+ */
+ r->cached_cons = *r->consumer + r->size;
+
+ return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+ __u32 entries = r->cached_prod - r->cached_cons;
+
+ if (entries == 0) {
+ r->cached_prod = *r->producer;
+ entries = r->cached_prod - r->cached_cons;
+ }
+
+ return (entries > nb) ? nb : entries;
+}
+
+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
+ size_t nb, __u32 *idx)
+{
+ if (unlikely(xsk_prod_nb_free(prod, nb) < nb))
+ return 0;
+
+ *idx = prod->cached_prod;
+ prod->cached_prod += nb;
+
+ return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
+{
+ /* Make sure everything has been written to the ring before signalling
+ * this to the kernel.
+ */
+ smp_wmb();
+
+ *prod->producer += nb;
+}
+
+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
+ size_t nb, __u32 *idx)
+{
+ size_t entries = xsk_cons_nb_avail(cons, nb);
+
+ if (likely(entries > 0)) {
+ /* Make sure we do not speculatively read the data before
+ * we have received the packet buffers from the ring.
+ */
+ smp_rmb();
+
+ *idx = cons->cached_cons;
+ cons->cached_cons += entries;
+ }
+
+ return entries;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
+{
+ *cons->consumer += nb;
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+ return &((char *)umem_area)[addr];
+}
+
+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+
+struct xsk_umem_config {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+};
+
+/* Flags for the libbpf_flags field. */
+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
+
+struct xsk_socket_config {
+ __u32 rx_size;
+ __u32 tx_size;
+ __u32 libbpf_flags;
+ __u32 xdp_flags;
+ __u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
+ const char *ifname, __u32 queue_id,
+ struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_XSK_H */