aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/net/bpf_jit_comp.c264
-rw-r--r--include/linux/bpf.h60
-rw-r--r--include/linux/bpf_verifier.h3
-rw-r--r--include/linux/filter.h10
-rw-r--r--kernel/bpf/arraymap.c215
-rw-r--r--kernel/bpf/core.c73
-rw-r--r--kernel/bpf/map_in_map.c5
-rw-r--r--kernel/bpf/syscall.c56
-rw-r--r--kernel/bpf/verifier.c120
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c487
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall1.c48
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c31
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall4.c33
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall5.c40
15 files changed, 1364 insertions, 140 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 2e586f579945..15615c94804f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -203,8 +203,9 @@ struct jit_context {
/* Maximum number of bytes emitted while JITing one eBPF insn */
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-/* number of bytes emit_call() needs to generate call instruction */
-#define X86_CALL_SIZE 5
+
+/* Number of bytes emit_patch() needs to generate instructions */
+#define X86_PATCH_SIZE 5
#define PROLOGUE_SIZE 25
@@ -215,7 +216,7 @@ struct jit_context {
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
- int cnt = X86_CALL_SIZE;
+ int cnt = X86_PATCH_SIZE;
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
@@ -238,6 +239,123 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*pprog = prog;
}
+static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + X86_PATCH_SIZE);
+ if (!is_simm32(offset)) {
+ pr_err("Target call %p is out of range\n", func);
+ return -ERANGE;
+ }
+ EMIT1_off32(opcode, offset);
+ *pprog = prog;
+ return 0;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE8);
+}
+
+static int emit_jump(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE9);
+}
+
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr,
+ const bool text_live)
+{
+ int (*emit_patch_fn)(u8 **pprog, void *func, void *ip);
+ const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
+ u8 old_insn[X86_PATCH_SIZE] = {};
+ u8 new_insn[X86_PATCH_SIZE] = {};
+ u8 *prog;
+ int ret;
+
+ switch (t) {
+ case BPF_MOD_NOP_TO_CALL ... BPF_MOD_CALL_TO_NOP:
+ emit_patch_fn = emit_call;
+ break;
+ case BPF_MOD_NOP_TO_JUMP ... BPF_MOD_JUMP_TO_NOP:
+ emit_patch_fn = emit_jump;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ switch (t) {
+ case BPF_MOD_NOP_TO_CALL:
+ case BPF_MOD_NOP_TO_JUMP:
+ if (!old_addr && new_addr) {
+ memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
+
+ prog = new_insn;
+ ret = emit_patch_fn(&prog, new_addr, ip);
+ if (ret)
+ return ret;
+ break;
+ }
+ return -ENXIO;
+ case BPF_MOD_CALL_TO_CALL:
+ case BPF_MOD_JUMP_TO_JUMP:
+ if (old_addr && new_addr) {
+ prog = old_insn;
+ ret = emit_patch_fn(&prog, old_addr, ip);
+ if (ret)
+ return ret;
+
+ prog = new_insn;
+ ret = emit_patch_fn(&prog, new_addr, ip);
+ if (ret)
+ return ret;
+ break;
+ }
+ return -ENXIO;
+ case BPF_MOD_CALL_TO_NOP:
+ case BPF_MOD_JUMP_TO_NOP:
+ if (old_addr && !new_addr) {
+ memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
+
+ prog = old_insn;
+ ret = emit_patch_fn(&prog, old_addr, ip);
+ if (ret)
+ return ret;
+ break;
+ }
+ return -ENXIO;
+ default:
+ return -ENOTSUPP;
+ }
+
+ ret = -EBUSY;
+ mutex_lock(&text_mutex);
+ if (memcmp(ip, old_insn, X86_PATCH_SIZE))
+ goto out;
+ if (text_live)
+ text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+ else
+ memcpy(ip, new_insn, X86_PATCH_SIZE);
+ ret = 0;
+out:
+ mutex_unlock(&text_mutex);
+ return ret;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ if (!is_kernel_text((long)ip) &&
+ !is_bpf_text_address((long)ip))
+ /* BPF poking in modules is not supported */
+ return -EINVAL;
+
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+}
+
/*
* Generate the following code:
*
@@ -252,7 +370,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog)
{
u8 *prog = *pprog;
int label1, label2, label3;
@@ -319,6 +437,69 @@ static void emit_bpf_tail_call(u8 **pprog)
*pprog = prog;
}
+static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+ u8 **pprog, int addr, u8 *image)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+ EMIT2(X86_JA, 14); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+
+ poke->ip = image + (addr - X86_PATCH_SIZE);
+ poke->adj_off = PROLOGUE_SIZE;
+
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+ prog += X86_PATCH_SIZE;
+ /* out: */
+
+ *pprog = prog;
+}
+
+static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
+{
+ static const enum bpf_text_poke_type type = BPF_MOD_NOP_TO_JUMP;
+ struct bpf_jit_poke_descriptor *poke;
+ struct bpf_array *array;
+ struct bpf_prog *target;
+ int i, ret;
+
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ poke = &prog->aux->poke_tab[i];
+ WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+
+ if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+ continue;
+
+ array = container_of(poke->tail_call.map, struct bpf_array, map);
+ mutex_lock(&array->aux->poke_mutex);
+ target = array->ptrs[poke->tail_call.key];
+ if (target) {
+ /* Plain memcpy is used when image is not live yet
+ * and still not locked as read-only. Once poke
+ * location is active (poke->ip_stable), any parallel
+ * bpf_arch_text_poke() might occur still on the
+ * read-write image until we finally locked it as
+ * read-only. Both modifications on the given image
+ * are under text_mutex to avoid interference.
+ */
+ ret = __bpf_arch_text_poke(poke->ip, type, NULL,
+ (u8 *)target->bpf_func +
+ poke->adj_off, false);
+ BUG_ON(ret < 0);
+ }
+ WRITE_ONCE(poke->ip_stable, true);
+ mutex_unlock(&array->aux->poke_mutex);
+ }
+}
+
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u32 dst_reg, const u32 imm32)
{
@@ -480,72 +661,6 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
*pprog = prog;
}
-static int emit_call(u8 **pprog, void *func, void *ip)
-{
- u8 *prog = *pprog;
- int cnt = 0;
- s64 offset;
-
- offset = func - (ip + X86_CALL_SIZE);
- if (!is_simm32(offset)) {
- pr_err("Target call %p is out of range\n", func);
- return -EINVAL;
- }
- EMIT1_off32(0xE8, offset);
- *pprog = prog;
- return 0;
-}
-
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *old_addr, void *new_addr)
-{
- u8 old_insn[X86_CALL_SIZE] = {};
- u8 new_insn[X86_CALL_SIZE] = {};
- u8 *prog;
- int ret;
-
- if (!is_kernel_text((long)ip) &&
- !is_bpf_text_address((long)ip))
- /* BPF trampoline in modules is not supported */
- return -EINVAL;
-
- if (old_addr) {
- prog = old_insn;
- ret = emit_call(&prog, old_addr, (void *)ip);
- if (ret)
- return ret;
- }
- if (new_addr) {
- prog = new_insn;
- ret = emit_call(&prog, new_addr, (void *)ip);
- if (ret)
- return ret;
- }
- ret = -EBUSY;
- mutex_lock(&text_mutex);
- switch (t) {
- case BPF_MOD_NOP_TO_CALL:
- if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE))
- goto out;
- text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
- break;
- case BPF_MOD_CALL_TO_CALL:
- if (memcmp(ip, old_insn, X86_CALL_SIZE))
- goto out;
- text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
- break;
- case BPF_MOD_CALL_TO_NOP:
- if (memcmp(ip, old_insn, X86_CALL_SIZE))
- goto out;
- text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE, NULL);
- break;
- }
- ret = 0;
-out:
- mutex_unlock(&text_mutex);
- return ret;
-}
-
static bool ex_handler_bpf(const struct exception_table_entry *x,
struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr)
@@ -1013,7 +1128,11 @@ xadd: if (is_imm8(insn->off))
break;
case BPF_JMP | BPF_TAIL_CALL:
- emit_bpf_tail_call(&prog);
+ if (imm32)
+ emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+ &prog, addrs[i], image);
+ else
+ emit_bpf_tail_call_indirect(&prog);
break;
/* cond jump */
@@ -1394,7 +1513,7 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
*/
- orig_call += X86_CALL_SIZE;
+ orig_call += X86_PATCH_SIZE;
prog = image;
@@ -1571,6 +1690,7 @@ out_image:
if (image) {
if (!prog->is_func || extra_pass) {
+ bpf_tail_call_direct_fixup(prog);
bpf_jit_binary_lock_ro(header);
} else {
jit_data->addrs = addrs;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e89e86122233..c2f07fd410c1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -22,6 +22,7 @@ struct bpf_verifier_env;
struct bpf_verifier_log;
struct perf_event;
struct bpf_prog;
+struct bpf_prog_aux;
struct bpf_map;
struct sock;
struct seq_file;
@@ -64,6 +65,12 @@ struct bpf_map_ops {
const struct btf_type *key_type,
const struct btf_type *value_type);
+ /* Prog poke tracking helpers. */
+ int (*map_poke_track)(struct bpf_map *map, struct bpf_prog_aux *aux);
+ void (*map_poke_untrack)(struct bpf_map *map, struct bpf_prog_aux *aux);
+ void (*map_poke_run)(struct bpf_map *map, u32 key, struct bpf_prog *old,
+ struct bpf_prog *new);
+
/* Direct value access helpers. */
int (*map_direct_value_addr)(const struct bpf_map *map,
u64 *imm, u32 off);
@@ -488,6 +495,24 @@ struct bpf_func_info_aux {
bool unreliable;
};
+enum bpf_jit_poke_reason {
+ BPF_POKE_REASON_TAIL_CALL,
+};
+
+/* Descriptor of pokes pointing /into/ the JITed image. */
+struct bpf_jit_poke_descriptor {
+ void *ip;
+ union {
+ struct {
+ struct bpf_map *map;
+ u32 key;
+ } tail_call;
+ };
+ bool ip_stable;
+ u8 adj_off;
+ u16 reason;
+};
+
struct bpf_prog_aux {
atomic64_t refcnt;
u32 used_map_cnt;
@@ -513,6 +538,8 @@ struct bpf_prog_aux {
const char *attach_func_name;
struct bpf_prog **func;
void *jit_data; /* JIT specific data. arch dependent */
+ struct bpf_jit_poke_descriptor *poke_tab;
+ u32 size_poke_tab;
struct latch_tree_node ksym_tnode;
struct list_head ksym_lnode;
const struct bpf_prog_ops *ops;
@@ -560,17 +587,26 @@ struct bpf_prog_aux {
};
};
+struct bpf_array_aux {
+ /* 'Ownership' of prog array is claimed by the first program that
+ * is going to use this map or by the first program which FD is
+ * stored in the map to make sure that all callers and callees have
+ * the same prog type and JITed flag.
+ */
+ enum bpf_prog_type type;
+ bool jited;
+ /* Programs with direct jumps into programs part of this array. */
+ struct list_head poke_progs;
+ struct bpf_map *map;
+ struct mutex poke_mutex;
+ struct work_struct work;
+};
+
struct bpf_array {
struct bpf_map map;
u32 elem_size;
u32 index_mask;
- /* 'ownership' of prog_array is claimed by the first program that
- * is going to use this map or by the first program which FD is stored
- * in the map to make sure that all callers and callees have the same
- * prog_type and JITed flag
- */
- enum bpf_prog_type owner_prog_type;
- bool owner_jited;
+ struct bpf_array_aux *aux;
union {
char value[0] __aligned(8);
void *ptrs[0] __aligned(8);
@@ -1031,6 +1067,10 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
{
return -ENOTSUPP;
}
+
+static inline void bpf_map_put(struct bpf_map *map)
+{
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -1284,10 +1324,16 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
#endif /* CONFIG_INET */
enum bpf_text_poke_type {
+ /* All call-related pokes. */
BPF_MOD_NOP_TO_CALL,
BPF_MOD_CALL_TO_CALL,
BPF_MOD_CALL_TO_NOP,
+ /* All jump-related pokes. */
+ BPF_MOD_NOP_TO_JUMP,
+ BPF_MOD_JUMP_TO_JUMP,
+ BPF_MOD_JUMP_TO_NOP,
};
+
int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
void *addr1, void *addr2);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index cdd08bf0ec06..26e40de9ef55 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -293,7 +293,7 @@ struct bpf_verifier_state_list {
struct bpf_insn_aux_data {
union {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
- unsigned long map_state; /* pointer/poison value for maps */
+ unsigned long map_ptr_state; /* pointer/poison value for maps */
s32 call_imm; /* saved imm field of call insn */
u32 alu_limit; /* limit for add/sub register with pointer */
struct {
@@ -301,6 +301,7 @@ struct bpf_insn_aux_data {
u32 map_off; /* offset from value base address */
};
};
+ u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
int sanitize_stack_off; /* stack slot to be cleared */
bool seen; /* this insn was processed by the verifier */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ad80e9c6111c..796b60d8cc6c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -952,6 +952,9 @@ void *bpf_jit_alloc_exec(unsigned long size);
void bpf_jit_free_exec(void *addr);
void bpf_jit_free(struct bpf_prog *fp);
+int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
+ struct bpf_jit_poke_descriptor *poke);
+
int bpf_jit_get_func_addr(const struct bpf_prog *prog,
const struct bpf_insn *insn, bool extra_pass,
u64 *func_addr, bool *func_addr_fixed);
@@ -1055,6 +1058,13 @@ static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
return false;
}
+static inline int
+bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
+ struct bpf_jit_poke_descriptor *poke)
+{
+ return -ENOTSUPP;
+}
+
static inline void bpf_jit_free(struct bpf_prog *fp)
{
bpf_prog_unlock_free(fp);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633c8c701ff6..58bdf5fd24cc 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -586,10 +586,17 @@ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
if (IS_ERR(new_ptr))
return PTR_ERR(new_ptr);
- old_ptr = xchg(array->ptrs + index, new_ptr);
+ if (map->ops->map_poke_run) {
+ mutex_lock(&array->aux->poke_mutex);
+ old_ptr = xchg(array->ptrs + index, new_ptr);
+ map->ops->map_poke_run(map, index, old_ptr, new_ptr);
+ mutex_unlock(&array->aux->poke_mutex);
+ } else {
+ old_ptr = xchg(array->ptrs + index, new_ptr);
+ }
+
if (old_ptr)
map->ops->map_fd_put_ptr(old_ptr);
-
return 0;
}
@@ -602,7 +609,15 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
if (index >= array->map.max_entries)
return -E2BIG;
- old_ptr = xchg(array->ptrs + index, NULL);
+ if (map->ops->map_poke_run) {
+ mutex_lock(&array->aux->poke_mutex);
+ old_ptr = xchg(array->ptrs + index, NULL);
+ map->ops->map_poke_run(map, index, old_ptr, NULL);
+ mutex_unlock(&array->aux->poke_mutex);
+ } else {
+ old_ptr = xchg(array->ptrs + index, NULL);
+ }
+
if (old_ptr) {
map->ops->map_fd_put_ptr(old_ptr);
return 0;
@@ -671,17 +686,205 @@ static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
+struct prog_poke_elem {
+ struct list_head list;
+ struct bpf_prog_aux *aux;
+};
+
+static int prog_array_map_poke_track(struct bpf_map *map,
+ struct bpf_prog_aux *prog_aux)
+{
+ struct prog_poke_elem *elem;
+ struct bpf_array_aux *aux;
+ int ret = 0;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ mutex_lock(&aux->poke_mutex);
+ list_for_each_entry(elem, &aux->poke_progs, list) {
+ if (elem->aux == prog_aux)
+ goto out;
+ }
+
+ elem = kmalloc(sizeof(*elem), GFP_KERNEL);
+ if (!elem) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&elem->list);
+ /* We must track the program's aux info at this point in time
+ * since the program pointer itself may not be stable yet, see
+ * also comment in prog_array_map_poke_run().
+ */
+ elem->aux = prog_aux;
+
+ list_add_tail(&elem->list, &aux->poke_progs);
+out:
+ mutex_unlock(&aux->poke_mutex);
+ return ret;
+}
+
+static void prog_array_map_poke_untrack(struct bpf_map *map,
+ struct bpf_prog_aux *prog_aux)
+{
+ struct prog_poke_elem *elem, *tmp;
+ struct bpf_array_aux *aux;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ mutex_lock(&aux->poke_mutex);
+ list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
+ if (elem->aux == prog_aux) {
+ list_del_init(&elem->list);
+ kfree(elem);
+ break;
+ }
+ }
+ mutex_unlock(&aux->poke_mutex);
+}
+
+static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
+ struct bpf_prog *old,
+ struct bpf_prog *new)
+{
+ enum bpf_text_poke_type type;
+ struct prog_poke_elem *elem;
+ struct bpf_array_aux *aux;
+
+ if (!old && new)
+ type = BPF_MOD_NOP_TO_JUMP;
+ else if (old && !new)
+ type = BPF_MOD_JUMP_TO_NOP;
+ else if (old && new)
+ type = BPF_MOD_JUMP_TO_JUMP;
+ else
+ return;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
+
+ list_for_each_entry(elem, &aux->poke_progs, list) {
+ struct bpf_jit_poke_descriptor *poke;
+ int i, ret;
+
+ for (i = 0; i < elem->aux->size_poke_tab; i++) {
+ poke = &elem->aux->poke_tab[i];
+
+ /* Few things to be aware of:
+ *
+ * 1) We can only ever access aux in this context, but
+ * not aux->prog since it might not be stable yet and
+ * there could be danger of use after free otherwise.
+ * 2) Initially when we start tracking aux, the program
+ * is not JITed yet and also does not have a kallsyms
+ * entry. We skip these as poke->ip_stable is not
+ * active yet. The JIT will do the final fixup before
+ * setting it stable. The various poke->ip_stable are
+ * successively activated, so tail call updates can
+ * arrive from here while JIT is still finishing its
+ * final fixup for non-activated poke entries.
+ * 3) On program teardown, the program's kallsym entry gets
+ * removed out of RCU callback, but we can only untrack
+ * from sleepable context, therefore bpf_arch_text_poke()
+ * might not see that this is in BPF text section and
+ * bails out with -EINVAL. As these are unreachable since
+ * RCU grace period already passed, we simply skip them.
+ * 4) Also programs reaching refcount of zero while patching
+ * is in progress is okay since we're protected under
+ * poke_mutex and untrack the programs before the JIT
+ * buffer is freed. When we're still in the middle of
+ * patching and suddenly kallsyms entry of the program
+ * gets evicted, we just skip the rest which is fine due
+ * to point 3).
+ * 5) Any other error happening below from bpf_arch_text_poke()
+ * is a unexpected bug.
+ */
+ if (!READ_ONCE(poke->ip_stable))
+ continue;
+ if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+ continue;
+ if (poke->tail_call.map != map ||
+ poke->tail_call.key != key)
+ continue;
+
+ ret = bpf_arch_text_poke(poke->ip, type,
+ old ? (u8 *)old->bpf_func +
+ poke->adj_off : NULL,
+ new ? (u8 *)new->bpf_func +
+ poke->adj_off : NULL);
+ BUG_ON(ret < 0 && ret != -EINVAL);
+ }
+ }
+}
+
+static void prog_array_map_clear_deferred(struct work_struct *work)
+{
+ struct bpf_map *map = container_of(work, struct bpf_array_aux,
+ work)->map;
+ bpf_fd_array_map_clear(map);
+ bpf_map_put(map);
+}
+
+static void prog_array_map_clear(struct bpf_map *map)
+{
+ struct bpf_array_aux *aux = container_of(map, struct bpf_array,
+ map)->aux;
+ bpf_map_inc(map);
+ schedule_work(&aux->work);
+}
+
+static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_array_aux *aux;
+ struct bpf_map *map;
+
+ aux = kzalloc(sizeof(*aux), GFP_KERNEL);
+ if (!aux)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_WORK(&aux->work, prog_array_map_clear_deferred);
+ INIT_LIST_HEAD(&aux->poke_progs);
+ mutex_init(&aux->poke_mutex);
+
+ map = array_map_alloc(attr);
+ if (IS_ERR(map)) {
+ kfree(aux);
+ return map;
+ }
+
+ container_of(map, struct bpf_array, map)->aux = aux;
+ aux->map = map;
+
+ return map;
+}
+
+static void prog_array_map_free(struct bpf_map *map)
+{
+ struct prog_poke_elem *elem, *tmp;
+ struct bpf_array_aux *aux;
+
+ aux = container_of(map, struct bpf_array, map)->aux;
+ list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
+ list_del_init(&elem->list);
+ kfree(elem);
+ }
+ kfree(aux);
+ fd_array_map_free(map);
+}
+
const struct bpf_map_ops prog_array_map_ops = {
.map_alloc_check = fd_array_map_alloc_check,
- .map_alloc = array_map_alloc,
- .map_free = fd_array_map_free,
+ .map_alloc = prog_array_map_alloc,
+ .map_free = prog_array_map_free,
+ .map_poke_track = prog_array_map_poke_track,
+ .map_poke_untrack = prog_array_map_poke_untrack,
+ .map_poke_run = prog_array_map_poke_run,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
- .map_release_uref = bpf_fd_array_map_clear,
+ .map_release_uref = prog_array_map_clear,
.map_seq_show_elem = prog_array_map_seq_show_elem,
};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b5945c3aaa8e..49e32acad7d8 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -256,6 +256,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
{
if (fp->aux) {
free_percpu(fp->aux->stats);
+ kfree(fp->aux->poke_tab);
kfree(fp->aux);
}
vfree(fp);
@@ -756,6 +757,39 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
return ret;
}
+int bpf_jit_add_poke_descriptor(struct bpf_prog *prog,
+ struct bpf_jit_poke_descriptor *poke)
+{
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
+ static const u32 poke_tab_max = 1024;
+ u32 slot = prog->aux->size_poke_tab;
+ u32 size = slot + 1;
+
+ if (size > poke_tab_max)
+ return -ENOSPC;
+ if (poke->ip || poke->ip_stable || poke->adj_off)
+ return -EINVAL;
+
+ switch (poke->reason) {
+ case BPF_POKE_REASON_TAIL_CALL:
+ if (!poke->tail_call.map)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ tab = krealloc(tab, size * sizeof(*poke), GFP_KERNEL);
+ if (!tab)
+ return -ENOMEM;
+
+ memcpy(&tab[slot], poke, sizeof(*poke));
+ prog->aux->size_poke_tab = size;
+ prog->aux->poke_tab = tab;
+
+ return slot;
+}
+
static atomic_long_t bpf_jit_current;
/* Can be overridden by an arch's JIT compiler if it has a custom,
@@ -1691,18 +1725,17 @@ bool bpf_prog_array_compatible(struct bpf_array *array,
if (fp->kprobe_override)
return false;
- if (!array->owner_prog_type) {
+ if (!array->aux->type) {
/* There's no owner yet where we could check for
* compatibility.
*/
- array->owner_prog_type = fp->type;
- array->owner_jited = fp->jited;
-
+ array->aux->type = fp->type;
+ array->aux->jited = fp->jited;
return true;
}
- return array->owner_prog_type == fp->type &&
- array->owner_jited == fp->jited;
+ return array->aux->type == fp->type &&
+ array->aux->jited == fp->jited;
}
static int bpf_check_tail_call(const struct bpf_prog *fp)
@@ -2003,12 +2036,40 @@ int bpf_prog_array_copy_info(struct bpf_prog_array *array,
: 0;
}
+static void bpf_free_cgroup_storage(struct bpf_prog_aux *aux)
+{
+ enum bpf_cgroup_storage_type stype;
+
+ for_each_cgroup_storage_type(stype) {
+ if (!aux->cgroup_storage[stype])
+ continue;
+ bpf_cgroup_storage_release(aux->prog,
+ aux->cgroup_storage[stype]);
+ }
+}
+
+static void bpf_free_used_maps(struct bpf_prog_aux *aux)
+{
+ struct bpf_map *map;
+ int i;
+
+ bpf_free_cgroup_storage(aux);
+ for (i = 0; i < aux->used_map_cnt; i++) {
+ map = aux->used_maps[i];
+ if (map->ops->map_poke_untrack)
+ map->ops->map_poke_untrack(map, aux);
+ bpf_map_put(map);
+ }
+ kfree(aux->used_maps);
+}
+
static void bpf_prog_free_deferred(struct work_struct *work)
{
struct bpf_prog_aux *aux;
int i;
aux = container_of(work, struct bpf_prog_aux, work);
+ bpf_free_used_maps(aux);
if (bpf_prog_is_dev_bound(aux))
bpf_prog_offload_destroy(aux->prog);
#ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 4cbe987be35b..5e9366b33f0f 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -17,9 +17,8 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
if (IS_ERR(inner_map))
return inner_map;
- /* prog_array->owner_prog_type and owner_jited
- * is a runtime binding. Doing static check alone
- * in the verifier is not enough.
+ /* prog_array->aux->{type,jited} is a runtime binding.
+ * Doing static check alone in the verifier is not enough.
*/
if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4ae52eb05f41..e3461ec59570 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -25,12 +25,13 @@
#include <linux/nospec.h>
#include <uapi/linux/btf.h>
-#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
- (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
- (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
- (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
+ (map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+#define IS_FD_PROG_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY)
#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
-#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map) || \
+ IS_FD_HASH(map))
#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
@@ -389,13 +390,12 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_map *map = filp->private_data;
const struct bpf_array *array;
- u32 owner_prog_type = 0;
- u32 owner_jited = 0;
+ u32 type = 0, jited = 0;
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
array = container_of(map, struct bpf_array, map);
- owner_prog_type = array->owner_prog_type;
- owner_jited = array->owner_jited;
+ type = array->aux->type;
+ jited = array->aux->jited;
}
seq_printf(m,
@@ -415,12 +415,9 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->memory.pages * 1ULL << PAGE_SHIFT,
map->id,
READ_ONCE(map->frozen));
-
- if (owner_prog_type) {
- seq_printf(m, "owner_prog_type:\t%u\n",
- owner_prog_type);
- seq_printf(m, "owner_jited:\t%u\n",
- owner_jited);
+ if (type) {
+ seq_printf(m, "owner_prog_type:\t%u\n", type);
+ seq_printf(m, "owner_jited:\t%u\n", jited);
}
}
#endif
@@ -881,7 +878,7 @@ static int map_lookup_elem(union bpf_attr *attr)
err = bpf_percpu_cgroup_storage_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
err = bpf_stackmap_copy(map, key, value);
- } else if (IS_FD_ARRAY(map)) {
+ } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
err = bpf_fd_array_map_lookup_elem(map, key, value);
} else if (IS_FD_HASH(map)) {
err = bpf_fd_htab_map_lookup_elem(map, key, value);
@@ -1008,6 +1005,10 @@ static int map_update_elem(union bpf_attr *attr)
map->map_type == BPF_MAP_TYPE_SOCKMAP) {
err = map->ops->map_update_elem(map, key, value, attr->flags);
goto out;
+ } else if (IS_FD_PROG_ARRAY(map)) {
+ err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+ attr->flags);
+ goto out;
}
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
@@ -1090,6 +1091,9 @@ static int map_delete_elem(union bpf_attr *attr)
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_delete_elem(map, key);
goto out;
+ } else if (IS_FD_PROG_ARRAY(map)) {
+ err = map->ops->map_delete_elem(map, key);
+ goto out;
}
preempt_disable();
@@ -1302,25 +1306,6 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
return 0;
}
-/* drop refcnt on maps used by eBPF program and free auxilary data */
-static void free_used_maps(struct bpf_prog_aux *aux)
-{
- enum bpf_cgroup_storage_type stype;
- int i;
-
- for_each_cgroup_storage_type(stype) {
- if (!aux->cgroup_storage[stype])
- continue;
- bpf_cgroup_storage_release(aux->prog,
- aux->cgroup_storage[stype]);
- }
-
- for (i = 0; i < aux->used_map_cnt; i++)
- bpf_map_put(aux->used_maps[i]);
-
- kfree(aux->used_maps);
-}
-
int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -1415,7 +1400,6 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
kvfree(aux->func_info);
kfree(aux->func_info_aux);
- free_used_maps(aux);
bpf_prog_uncharge_memlock(aux->prog);
security_bpf_prog_free(aux);
bpf_prog_free(aux->prog);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fc85714428c7..a0482e1c4a77 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -171,6 +171,9 @@ struct bpf_verifier_stack_elem {
#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
#define BPF_COMPLEXITY_LIMIT_STATES 64
+#define BPF_MAP_KEY_POISON (1ULL << 63)
+#define BPF_MAP_KEY_SEEN (1ULL << 62)
+
#define BPF_MAP_PTR_UNPRIV 1UL
#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + \
POISON_POINTER_DELTA))
@@ -178,12 +181,12 @@ struct bpf_verifier_stack_elem {
static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
{
- return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
+ return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
}
static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
{
- return aux->map_state & BPF_MAP_PTR_UNPRIV;
+ return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
}
static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
@@ -191,8 +194,31 @@ static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
{
BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
unpriv |= bpf_map_ptr_unpriv(aux);
- aux->map_state = (unsigned long)map |
- (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+ aux->map_ptr_state = (unsigned long)map |
+ (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
+}
+
+static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & BPF_MAP_KEY_POISON;
+}
+
+static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
+{
+ return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
+}
+
+static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
+{
+ return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
+}
+
+static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
+{
+ bool poisoned = bpf_map_key_poisoned(aux);
+
+ aux->map_key_state = state | BPF_MAP_KEY_SEEN |
+ (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
}
struct bpf_call_arg_meta {
@@ -4090,15 +4116,49 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return -EACCES;
}
- if (!BPF_MAP_PTR(aux->map_state))
+ if (!BPF_MAP_PTR(aux->map_ptr_state))
bpf_map_ptr_store(aux, meta->map_ptr,
meta->map_ptr->unpriv_array);
- else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
+ else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
meta->map_ptr->unpriv_array);
return 0;
}
+static int
+record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
+ int func_id, int insn_idx)
+{
+ struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
+ struct bpf_reg_state *regs = cur_regs(env), *reg;
+ struct bpf_map *map = meta->map_ptr;
+ struct tnum range;
+ u64 val;
+
+ if (func_id != BPF_FUNC_tail_call)
+ return 0;
+ if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
+ verbose(env, "kernel subsystem misconfigured verifier\n");
+ return -EINVAL;
+ }
+
+ range = tnum_range(0, map->max_entries - 1);
+ reg = &regs[BPF_REG_3];
+
+ if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
+ return 0;
+ }
+
+ val = reg->var_off.value;
+ if (bpf_map_key_unseen(aux))
+ bpf_map_key_store(aux, val);
+ else if (!bpf_map_key_poisoned(aux) &&
+ bpf_map_key_immediate(aux) != val)
+ bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
+ return 0;
+}
+
static int check_reference_leak(struct bpf_verifier_env *env)
{
struct bpf_func_state *state = cur_func(env);
@@ -4173,6 +4233,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
if (err)
return err;
+ err = record_func_key(env, &meta, func_id, insn_idx);
+ if (err)
+ return err;
+
/* Mark slots with STACK_MISC in case of raw mode, stack offset
* is inferred from register state.
*/
@@ -9065,6 +9129,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
static int fixup_bpf_calls(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
+ bool expect_blinding = bpf_jit_blinding_enabled(prog);
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
const int insn_cnt = prog->len;
@@ -9073,7 +9138,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
struct bpf_map *map_ptr;
- int i, cnt, delta = 0;
+ int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
@@ -9217,6 +9282,26 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
+ if (prog->jit_requested && !expect_blinding &&
+ !bpf_map_key_poisoned(aux) &&
+ !bpf_map_ptr_poisoned(aux) &&
+ !bpf_map_ptr_unpriv(aux)) {
+ struct bpf_jit_poke_descriptor desc = {
+ .reason = BPF_POKE_REASON_TAIL_CALL,
+ .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
+ .tail_call.key = bpf_map_key_immediate(aux),
+ };
+
+ ret = bpf_jit_add_poke_descriptor(prog, &desc);
+ if (ret < 0) {
+ verbose(env, "adding tail call poke descriptor failed\n");
+ return ret;
+ }
+
+ insn->imm = ret + 1;
+ continue;
+ }
+
if (!bpf_map_ptr_unpriv(aux))
continue;
@@ -9231,7 +9316,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
return -EINVAL;
}
- map_ptr = BPF_MAP_PTR(aux->map_state);
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
map_ptr->max_entries, 2);
insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
@@ -9265,7 +9350,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
- map_ptr = BPF_MAP_PTR(aux->map_state);
+ map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
ops = map_ptr->ops;
if (insn->imm == BPF_FUNC_map_lookup_elem &&
ops->map_gen_lookup) {
@@ -9345,6 +9430,23 @@ patch_call_imm:
insn->imm = fn->func - __bpf_call_base;
}
+ /* Since poke tab is now finalized, publish aux to tracker. */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+ if (!map_ptr->ops->map_poke_track ||
+ !map_ptr->ops->map_poke_untrack ||
+ !map_ptr->ops->map_poke_run) {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
+
+ ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
+ if (ret < 0) {
+ verbose(env, "tracking tail call prog failed\n");
+ return ret;
+ }
+ }
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
new file mode 100644
index 000000000000..bb8fe646dd9f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+/* test_tailcall_1 checks basic functionality by patching multiple locations
+ * in a single program for a single tail call slot with nop->jmp, jmp->nop
+ * and jmp->jmp rewrites. Also checks for nop->nop.
+ */
+static void test_tailcall_1(void)
+{
+ int err, map_fd, prog_fd, main_fd, i, j;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != i, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", j);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ j = bpf_map__def(prog_array)->max_entries - 1 - i;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != j, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err >= 0 || errno != ENOENT))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_2 checks that patching multiple programs for a single
+ * tail call slot works. It also jumps through several programs and tests
+ * the tail call limit counter.
+ */
+static void test_tailcall_2(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char prog_name[32];
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 2;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_3 checks that the count value of the tail call limit
+ * enforcement matches with expectations.
+ */
+static void test_tailcall_3(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ char buff[128] = {};
+
+ err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val);
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_4 checks that the kernel properly selects indirect jump
+ * for the case where the key is not known. Latter is passed via global
+ * data to select different targets we can compare return value of.
+ */
+static void test_tailcall_4(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ static const int zero = 0;
+ char buff[128] = {};
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != i, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_5 probes similarly to test_tailcall_4 that the kernel generates
+ * an indirect jump when the keys are const but different from different branches.
+ */
+static void test_tailcall_5(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, key[] = { 1111, 1234, 5678 };
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 retval, duration;
+ static const int zero = 0;
+ char buff[128] = {};
+ char prog_name[32];
+
+ err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_title(obj, "classifier");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ return;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(map_fd < 0))
+ return;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != i, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != 3, "tailcall",
+ "err %d errno %d retval %d\n", err, errno, retval);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+void test_tailcalls(void)
+{
+ if (test__start_subtest("tailcall_1"))
+ test_tailcall_1();
+ if (test__start_subtest("tailcall_2"))
+ test_tailcall_2();
+ if (test__start_subtest("tailcall_3"))
+ test_tailcall_3();
+ if (test__start_subtest("tailcall_4"))
+ test_tailcall_4();
+ if (test__start_subtest("tailcall_5"))
+ test_tailcall_5();
+}
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
new file mode 100644
index 000000000000..63531e1a9fa4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+TAIL_FUNC(2)
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ /* Multiple locations to make sure we patch
+ * all of them.
+ */
+ bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call(skb, &jmp_table, 0);
+
+ bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call(skb, &jmp_table, 1);
+
+ bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call(skb, &jmp_table, 2);
+
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
new file mode 100644
index 000000000000..21c85c477210
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 5);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 1);
+ return 0;
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 2);
+ return 1;
+}
+
+SEC("classifier/2")
+int bpf_func_2(struct __sk_buff *skb)
+{
+ return 2;
+}
+
+SEC("classifier/3")
+int bpf_func_3(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 4);
+ return 3;
+}
+
+SEC("classifier/4")
+int bpf_func_4(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 3);
+ return 4;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 0);
+ /* Check multi-prog update. */
+ bpf_tail_call(skb, &jmp_table, 2);
+ /* Check tail call limit. */
+ bpf_tail_call(skb, &jmp_table, 3);
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
new file mode 100644
index 000000000000..1ecae198b8c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int count;
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+ count++;
+ bpf_tail_call(skb, &jmp_table, 0);
+ return 1;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, 0);
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
new file mode 100644
index 000000000000..499388758119
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int selector;
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+TAIL_FUNC(2)
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call(skb, &jmp_table, selector);
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
new file mode 100644
index 000000000000..49c64eb53f19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include "bpf_helpers.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int selector;
+
+#define TAIL_FUNC(x) \
+ SEC("classifier/" #x) \
+ int bpf_func_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+TAIL_FUNC(2)
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+ int idx = 0;
+
+ if (selector == 1234)
+ idx = 1;
+ else if (selector == 5678)
+ idx = 2;
+
+ bpf_tail_call(skb, &jmp_table, idx);
+ return 3;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;