From a83586a7ddba25065ec37323c05deb9019ce4fa9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 23 Feb 2021 21:14:57 +0800 Subject: bpf: Remove blank line in bpf helper description comment Commit 34b2021cc616 ("bpf: Add BPF-helper for MTU checking") added an extra blank line in bpf helper description. This will make bpf_helpers_doc.py stop building bpf_helper_defs.h immediately after bpf_check_mtu(), which will affect future added functions. Fixes: 34b2021cc616 ("bpf: Add BPF-helper for MTU checking") Signed-off-by: Hangbin Liu Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20210223131457.1378978-1-liuhangbin@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4c24daa43bac..79c893310492 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3850,7 +3850,6 @@ union bpf_attr { * * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) * Description - * Check ctx packet size against exceeding MTU of net device (based * on *ifindex*). This helper will likely be used in combination * with helpers that adjust/change the packet size. -- cgit v1.2.3-59-g8ed1b From 69c087ba6225b574afb6e505b72cb75242a3d844 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 26 Feb 2021 12:49:25 -0800 Subject: bpf: Add bpf_for_each_map_elem() helper The bpf_for_each_map_elem() helper is introduced which iterates all map elements with a callback function. The helper signature looks like long bpf_for_each_map_elem(map, callback_fn, callback_ctx, flags) and for each map element, the callback_fn will be called. For example, like hashmap, the callback signature may look like long callback_fn(map, key, val, callback_ctx) There are two known use cases for this. One is from upstream ([1]) where a for_each_map_elem helper may help implement a timeout mechanism in a more generic way. Another is from our internal discussion for a firewall use case where a map contains all the rules. The packet data can be compared to all these rules to decide allow or deny the packet. For array maps, users can already use a bounded loop to traverse elements. Using this helper can avoid using bounded loop. For other type of maps (e.g., hash maps) where bounded loop is hard or impossible to use, this helper provides a convenient way to operate on all elements. For callback_fn, besides map and map element, a callback_ctx, allocated on caller stack, is also passed to the callback function. This callback_ctx argument can provide additional input and allow to write to caller stack for output. If the callback_fn returns 0, the helper will iterate through next element if available. If the callback_fn returns 1, the helper will stop iterating and returns to the bpf program. Other return values are not used for now. Currently, this helper is only available with jit. It is possible to make it work with interpreter with so effort but I leave it as the future work. [1]: https://lore.kernel.org/bpf/20210122205415.113822-1-xiyou.wangcong@gmail.com/ Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210226204925.3884923-1-yhs@fb.com --- include/linux/bpf.h | 13 +++ include/linux/bpf_verifier.h | 3 + include/uapi/linux/bpf.h | 38 ++++++++ kernel/bpf/bpf_iter.c | 16 ++++ kernel/bpf/helpers.c | 2 + kernel/bpf/verifier.c | 208 ++++++++++++++++++++++++++++++++++++++--- kernel/trace/bpf_trace.c | 2 + tools/include/uapi/linux/bpf.h | 38 ++++++++ 8 files changed, 307 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1e4d2f60527..aeb1b93a4d75 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -39,6 +39,7 @@ struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; struct mem_cgroup; +struct bpf_func_state; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -129,6 +130,13 @@ struct bpf_map_ops { bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1); + + int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + void *callback_ctx, u64 flags); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@ -295,6 +303,8 @@ enum bpf_arg_type { ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ + ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ __BPF_ARG_TYPE_MAX, }; @@ -411,6 +421,8 @@ enum bpf_reg_type { PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ }; /* The information passed from prog-specific *_is_valid_access @@ -1887,6 +1899,7 @@ extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; +extern const struct bpf_func_proto bpf_for_each_map_elem_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 971b33aca13d..51c2ffa3d901 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -68,6 +68,8 @@ struct bpf_reg_state { unsigned long raw1; unsigned long raw2; } raw; + + u32 subprogno; /* for PTR_TO_FUNC */ }; /* For PTR_TO_PACKET, used to find other pointers with the same variable * offset, so they can share range knowledge. @@ -204,6 +206,7 @@ struct bpf_func_state { int acquired_refs; struct bpf_reference_state *refs; int allocated_stack; + bool in_callback_fn; struct bpf_stack_state *stack; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 79c893310492..b89af20cfa19 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -393,6 +393,15 @@ enum bpf_link_type { * is struct/union. */ #define BPF_PSEUDO_BTF_ID 3 +/* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ +#define BPF_PSEUDO_FUNC 4 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -3909,6 +3918,34 @@ union bpf_attr { * * **BPF_MTU_CHK_RET_FRAG_NEEDED** * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** * + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4075,6 +4112,7 @@ union bpf_attr { FN(ima_inode_hash), \ FN(sock_from_file), \ FN(check_mtu), \ + FN(for_each_map_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index a0d9eade9c80..931870f9cf56 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) */ return ret == 0 ? 0 : -EAGAIN; } + +BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, + void *, callback_ctx, u64, flags) +{ + return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); +} + +const struct bpf_func_proto bpf_for_each_map_elem_proto = { + .func = bpf_for_each_map_elem, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_FUNC, + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 308427fe03a3..074800226327 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; + case BPF_FUNC_for_each_map_elem: + return &bpf_for_each_map_elem_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbdca49ac6cc..53afe9461b03 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_CALL; } +static bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -248,6 +254,7 @@ struct bpf_call_arg_meta { u32 btf_id; struct btf *ret_btf; u32 ret_btf_id; + u32 subprogno; }; struct btf *btf_vmlinux; @@ -427,6 +434,7 @@ static bool reg_type_not_null(enum bpf_reg_type type) return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || + type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON; } @@ -469,7 +477,8 @@ static bool arg_type_may_be_null(enum bpf_arg_type type) type == ARG_PTR_TO_MEM_OR_NULL || type == ARG_PTR_TO_CTX_OR_NULL || type == ARG_PTR_TO_SOCKET_OR_NULL || - type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; + type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || + type == ARG_PTR_TO_STACK_OR_NULL; } /* Determine whether the function releases some resources allocated by another @@ -552,6 +561,8 @@ static const char * const reg_type_str[] = { [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", [PTR_TO_RDWR_BUF] = "rdwr_buf", [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", }; static char slot_type_char[] = { @@ -623,6 +634,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); else if (t == CONST_PTR_TO_MAP || + t == PTR_TO_MAP_KEY || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL) verbose(env, ",ks=%d,vs=%d", @@ -1555,6 +1567,19 @@ static int check_subprogs(struct bpf_verifier_env *env) /* determine subprog starts. The end is one before the next starts */ for (i = 0; i < insn_cnt; i++) { + if (bpf_pseudo_func(insn + i)) { + if (!env->bpf_capable) { + verbose(env, + "function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); + return -EPERM; + } + ret = add_subprog(env, i + insn[i].imm + 1); + if (ret < 0) + return ret; + /* remember subprog */ + insn[i + 1].imm = ret; + continue; + } if (!bpf_pseudo_call(insn + i)) continue; if (!env->bpf_capable) { @@ -2286,6 +2311,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_PERCPU_BTF_ID: case PTR_TO_MEM: case PTR_TO_MEM_OR_NULL: + case PTR_TO_FUNC: + case PTR_TO_MAP_KEY: return true; default: return false; @@ -2890,6 +2917,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno, reg = &cur_regs(env)[regno]; switch (reg->type) { + case PTR_TO_MAP_KEY: + verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n", + mem_size, off, size); + break; case PTR_TO_MAP_VALUE: verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size); @@ -3295,6 +3326,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, case PTR_TO_FLOW_KEYS: pointer_desc = "flow keys "; break; + case PTR_TO_MAP_KEY: + pointer_desc = "key "; + break; case PTR_TO_MAP_VALUE: pointer_desc = "value "; break; @@ -3396,7 +3430,7 @@ process_func: continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { - if (!bpf_pseudo_call(insn + i)) + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ ret_insn[frame] = i + 1; @@ -3833,7 +3867,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn /* for access checks, reg->off is just part of off */ off += reg->off; - if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_KEY) { + if (t == BPF_WRITE) { + verbose(env, "write to change key R%d not allowed\n", regno); + return -EACCES; + } + + err = check_mem_region_access(env, regno, off, size, + reg->map_ptr->key_size, false); + if (err) + return err; + if (value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into map\n", value_regno); @@ -4249,6 +4295,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); + case PTR_TO_MAP_KEY: + return check_mem_region_access(env, regno, reg->off, access_size, + reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: if (check_map_access_type(env, regno, reg->off, access_size, meta && meta->raw_mode ? BPF_WRITE : @@ -4465,6 +4514,7 @@ static const struct bpf_reg_types map_key_value_types = { PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@ -4496,6 +4546,7 @@ static const struct bpf_reg_types mem_types = { PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, PTR_TO_RDONLY_BUF, @@ -4508,6 +4559,7 @@ static const struct bpf_reg_types int_ptr_types = { PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@ -4520,6 +4572,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; +static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; +static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } }; static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@ -4548,6 +4602,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, + [ARG_PTR_TO_FUNC] = &func_ptr_types, + [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, }; static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@ -4729,6 +4785,8 @@ skip_type_check: verbose(env, "verifier internal error\n"); return -EFAULT; } + } else if (arg_type == ARG_PTR_TO_FUNC) { + meta->subprogno = reg->subprogno; } else if (arg_type_is_mem_ptr(arg_type)) { /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. @@ -5375,6 +5433,35 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); } +static int set_map_elem_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx]; + struct bpf_map *map; + int err; + + if (bpf_map_ptr_poisoned(insn_aux)) { + verbose(env, "tail_call abusing map_ptr\n"); + return -EINVAL; + } + + map = BPF_MAP_PTR(insn_aux->map_ptr_state); + if (!map->ops->map_set_for_each_callback_args || + !map->ops->map_for_each_callback) { + verbose(env, "callback function not allowed for map\n"); + return -ENOTSUPP; + } + + err = map->ops->map_set_for_each_callback_args(env, caller, callee); + if (err) + return err; + + callee->in_callback_fn = true; + return 0; +} + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@ -5397,8 +5484,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) state->curframe--; caller = state->frame[state->curframe]; - /* return to the caller whatever r0 had in the callee */ - caller->regs[BPF_REG_0] = *r0; + if (callee->in_callback_fn) { + /* enforce R0 return value range [0, 1]. */ + struct tnum range = tnum_range(0, 1); + + if (r0->type != SCALAR_VALUE) { + verbose(env, "R0 not a scalar value\n"); + return -EACCES; + } + if (!tnum_in(range, r0->var_off)) { + verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); + return -EINVAL; + } + } else { + /* return to the caller whatever r0 had in the callee */ + caller->regs[BPF_REG_0] = *r0; + } /* Transfer references to the caller */ err = transfer_reference_state(caller, callee); @@ -5453,7 +5554,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem && - func_id != BPF_FUNC_map_peek_elem) + func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_for_each_map_elem) return 0; if (map == NULL) { @@ -5534,15 +5636,18 @@ static int check_reference_leak(struct bpf_verifier_env *env) return state->acquired_refs ? -EINVAL : 0; } -static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) +static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; + int insn_idx = *insn_idx_p; bool changes_data; - int i, err; + int i, err, func_id; /* find function prototype */ + func_id = insn->imm; if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); @@ -5638,6 +5743,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (func_id == BPF_FUNC_for_each_map_elem) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_map_elem_callback_state); + if (err < 0) + return -EINVAL; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -5891,6 +6003,19 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, else *ptr_limit = -off; return 0; + case PTR_TO_MAP_KEY: + /* Currently, this code is not exercised as the only use + * is bpf_for_each_map_elem() helper which requires + * bpf_capble. The code has been tested manually for + * future use. + */ + if (mask_to_left) { + *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + } else { + off = ptr_reg->smin_value + ptr_reg->off; + *ptr_limit = ptr_reg->map_ptr->key_size - off; + } + return 0; case PTR_TO_MAP_VALUE: if (mask_to_left) { *ptr_limit = ptr_reg->umax_value + ptr_reg->off; @@ -6092,6 +6217,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; + case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", @@ -8271,6 +8397,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } + if (insn->src_reg == BPF_PSEUDO_FUNC) { + struct bpf_prog_aux *aux = env->prog->aux; + u32 subprogno = insn[1].imm; + + if (!aux->func_info) { + verbose(env, "missing btf func_info\n"); + return -EINVAL; + } + if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) { + verbose(env, "callback function not static\n"); + return -EINVAL; + } + + dst_reg->type = PTR_TO_FUNC; + dst_reg->subprogno = subprogno; + return 0; + } + map = env->used_maps[aux->map_index]; mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; @@ -8657,6 +8801,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env) struct bpf_insn *insns = env->prog->insnsi; int ret; + if (bpf_pseudo_func(insns + t)) + return visit_func_call_insn(t, insn_cnt, insns, env, true); + /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insns[t].code) != BPF_JMP && BPF_CLASS(insns[t].code) != BPF_JMP32) @@ -9277,6 +9424,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, */ return false; } + case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. @@ -10123,10 +10271,9 @@ static int do_check(struct bpf_verifier_env *env) if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else - err = check_helper_call(env, insn->imm, env->insn_idx); + err = check_helper_call(env, insn, &env->insn_idx); if (err) return err; - } else if (opcode == BPF_JA) { if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || @@ -10555,6 +10702,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) goto next_insn; } + if (insn[0].src_reg == BPF_PSEUDO_FUNC) { + aux = &env->insn_aux_data[i]; + aux->ptr_type = PTR_TO_FUNC; + goto next_insn; + } + /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ @@ -10687,9 +10840,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) int insn_cnt = env->prog->len; int i; - for (i = 0; i < insn_cnt; i++, insn++) - if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) - insn->src_reg = 0; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) + continue; + if (insn->src_reg == BPF_PSEUDO_FUNC) + continue; + insn->src_reg = 0; + } } /* single env->prog->insni[off] instruction was replaced with the range @@ -11330,6 +11487,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + env->insn_aux_data[i].call_imm = insn->imm; + /* subprog is encoded in insn[1].imm */ + continue; + } + if (!bpf_pseudo_call(insn)) continue; /* Upon error here we cannot fall back to interpreter but @@ -11459,6 +11622,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) for (i = 0; i < env->subprog_cnt; i++) { insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { + if (bpf_pseudo_func(insn)) { + subprog = insn[1].imm; + insn[0].imm = (u32)(long)func[subprog]->bpf_func; + insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; + continue; + } if (!bpf_pseudo_call(insn)) continue; subprog = insn->off; @@ -11504,6 +11673,11 @@ static int jit_subprogs(struct bpf_verifier_env *env) * later look the same as if they were interpreted only. */ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + insn[0].imm = env->insn_aux_data[i].call_imm; + insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + continue; + } if (!bpf_pseudo_call(insn)) continue; insn->off = env->insn_aux_data[i].call_imm; @@ -11568,6 +11742,14 @@ static int fixup_call_args(struct bpf_verifier_env *env) return -EINVAL; } for (i = 0; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + /* When JIT fails the progs with callback calls + * have to be rejected, since interpreter doesn't support them yet. + */ + verbose(env, "callbacks are not allowed in non-JITed programs\n"); + return -EINVAL; + } + if (!bpf_pseudo_call(insn)) continue; depth = get_callee_stack_depth(env, insn, i); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e9701744d8e4..0d23755c2747 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1371,6 +1371,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: return &bpf_task_storage_delete_proto; + case BPF_FUNC_for_each_map_elem: + return &bpf_for_each_map_elem_proto; default: return NULL; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 79c893310492..b89af20cfa19 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -393,6 +393,15 @@ enum bpf_link_type { * is struct/union. */ #define BPF_PSEUDO_BTF_ID 3 +/* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ +#define BPF_PSEUDO_FUNC 4 /* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@ -3909,6 +3918,34 @@ union bpf_attr { * * **BPF_MTU_CHK_RET_FRAG_NEEDED** * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** * + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4075,6 +4112,7 @@ union bpf_attr { FN(ima_inode_hash), \ FN(sock_from_file), \ FN(check_mtu), \ + FN(for_each_map_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper -- cgit v1.2.3-59-g8ed1b From 7799e4d9d84f6f8231dfd9dca4da5f4b2f0aa932 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:33 -0800 Subject: bpf: Import syscall arg documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These descriptions are present in the man-pages project from the original submissions around 2015-2016. Import them so that they can be kept up to date as developers extend the bpf syscall commands. These descriptions follow the pattern used by scripts/bpf_helpers_doc.py so that we can take advantage of the parser to generate more up-to-date man page writing based upon these headers. Some minor wording adjustments were made to make the descriptions more consistent for the description / return format. Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-2-joe@cilium.io Co-authored-by: Alexei Starovoitov Co-authored-by: Michael Kerrisk --- include/uapi/linux/bpf.h | 122 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b89af20cfa19..fb16c590e6d9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -93,7 +93,127 @@ union bpf_iter_link_info { } map; }; -/* BPF syscall commands, see bpf(2) man-page for details. */ +/* BPF syscall commands, see bpf(2) man-page for more details. */ +/** + * DOC: eBPF Syscall Preamble + * + * The operation to be performed by the **bpf**\ () system call is determined + * by the *cmd* argument. Each operation takes an accompanying argument, + * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see + * below). The size argument is the size of the union pointed to by *attr*. + */ +/** + * DOC: eBPF Syscall Commands + * + * BPF_MAP_CREATE + * Description + * Create a map and return a file descriptor that refers to the + * map. The close-on-exec file descriptor flag (see **fcntl**\ (2)) + * is automatically enabled for the new file descriptor. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_MAP_CREATE** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_LOOKUP_ELEM + * Description + * Look up an element with a given *key* in the map referred to + * by the file descriptor *map_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_ELEM + * Description + * Create or update an element (key/value pair) in a specified map. + * + * The *flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create a new element or update an existing element. + * **BPF_NOEXIST** + * Create a new element only if it did not exist. + * **BPF_EXIST** + * Update an existing element. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, + * **E2BIG**, **EEXIST**, or **ENOENT**. + * + * **E2BIG** + * The number of elements in the map reached the + * *max_entries* limit specified at map creation time. + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_ELEM + * Description + * Look up and delete an element by key in a specified map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_GET_NEXT_KEY + * Description + * Look up an element by key in a specified map and return the key + * of the next element. Can be used to iterate over all elements + * in the map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * The following cases can be used to iterate over all elements of + * the map: + * + * * If *key* is not found, the operation returns zero and sets + * the *next_key* pointer to the key of the first element. + * * If *key* is found, the operation returns zero and sets the + * *next_key* pointer to the key of the next element. + * * If *key* is the last element, returns -1 and *errno* is set + * to **ENOENT**. + * + * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or + * **EINVAL** on error. + * + * BPF_PROG_LOAD + * Description + * Verify and load an eBPF program, returning a new file + * descriptor associated with the program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES). + * + * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is + * automatically enabled for the new file descriptor. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * NOTES + * eBPF objects (maps and programs) can be shared between processes. + * For example, after **fork**\ (2), the child inherits file descriptors + * referring to the same eBPF objects. In addition, file descriptors + * referring to eBPF objects can be transferred over UNIX domain sockets. + * File descriptors referring to eBPF objects can be duplicated in the + * usual way, using **dup**\ (2) and similar calls. An eBPF object is + * deallocated only after all file descriptors referring to the object + * have been closed. + */ enum bpf_cmd { BPF_MAP_CREATE, BPF_MAP_LOOKUP_ELEM, -- cgit v1.2.3-59-g8ed1b From f67c9cbf6c581468f6c7144d497565cfc7918c31 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:34 -0800 Subject: bpf: Add minimal bpf() command documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce high-level descriptions of the intent and return codes of the bpf() syscall commands. Subsequent patches may further flesh out the content to provide a more useful programming reference. Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-3-joe@cilium.io --- include/uapi/linux/bpf.h | 368 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index fb16c590e6d9..052bbfe65f77 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -204,6 +204,374 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_OBJ_PIN + * Description + * Pin an eBPF program or map referred by the specified *bpf_fd* + * to the provided *pathname* on the filesystem. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_OBJ_GET + * Description + * Open a file descriptor for the eBPF object pinned to the + * specified *pathname*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_PROG_ATTACH + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_DETACH + * Description + * Detach the eBPF program associated with the *target_fd* at the + * hook specified by *attach_type*. The program must have been + * previously attached using **BPF_PROG_ATTACH**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_TEST_RUN + * Description + * Run an eBPF program a number of times against a provided + * program context and return the modified program context and + * duration of the test run. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_GET_NEXT_ID + * Description + * Fetch the next eBPF program currently loaded into the kernel. + * + * Looks for the eBPF program with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF programs + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_GET_NEXT_ID + * Description + * Fetch the next eBPF map currently loaded into the kernel. + * + * Looks for the eBPF map with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF maps + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_PROG_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF program corresponding to + * *prog_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF map corresponding to + * *map_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_GET_INFO_BY_FD + * Description + * Obtain information about the eBPF object corresponding to + * *bpf_fd*. + * + * Populates up to *info_len* bytes of *info*, which will be in + * one of the following formats depending on the eBPF object type + * of *bpf_fd*: + * + * * **struct bpf_prog_info** + * * **struct bpf_map_info** + * * **struct bpf_btf_info** + * * **struct bpf_link_info** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_QUERY + * Description + * Obtain information about eBPF programs associated with the + * specified *attach_type* hook. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_RAW_TRACEPOINT_OPEN + * Description + * Attach an eBPF program to a tracepoint *name* to access kernel + * internal arguments of the tracepoint in their raw form. + * + * The *prog_fd* must be a valid file descriptor associated with + * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**. + * + * No ABI guarantees are made about the content of tracepoint + * arguments exposed to the corresponding eBPF program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_LOAD + * Description + * Verify and load BPF Type Format (BTF) metadata into the kernel, + * returning a new file descriptor associated with the metadata. + * BTF is described in more detail at + * https://www.kernel.org/doc/html/latest/bpf/btf.html. + * + * The *btf* parameter must point to valid memory providing + * *btf_size* bytes of BTF binary metadata. + * + * The returned file descriptor can be passed to other **bpf**\ () + * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to + * associate the BTF with those objects. + * + * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional + * parameters to specify a *btf_log_buf*, *btf_log_size* and + * *btf_log_level* which allow the kernel to return freeform log + * output regarding the BTF verification process. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_GET_FD_BY_ID + * Description + * Open a file descriptor for the BPF Type Format (BTF) + * corresponding to *btf_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_TASK_FD_QUERY + * Description + * Obtain information about eBPF programs associated with the + * target process identified by *pid* and *fd*. + * + * If the *pid* and *fd* are associated with a tracepoint, kprobe + * or uprobe perf event, then the *prog_id* and *fd_type* will + * be populated with the eBPF program id and file descriptor type + * of type **bpf_task_fd_type**. If associated with a kprobe or + * uprobe, the *probe_offset* and *probe_addr* will also be + * populated. Optionally, if *buf* is provided, then up to + * *buf_len* bytes of *buf* will be populated with the name of + * the tracepoint, kprobe or uprobe. + * + * The resulting *prog_id* may be introspected in deeper detail + * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_LOOKUP_AND_DELETE_ELEM + * Description + * Look up an element with the given *key* in the map referred to + * by the file descriptor *fd*, and if found, delete the element. + * + * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types + * implement this command as a "pop" operation, deleting the top + * element rather than one corresponding to *key*. + * The *key* and *key_len* parameters should be zeroed when + * issuing this operation for these map types. + * + * This command is only valid for the following map types: + * * **BPF_MAP_TYPE_QUEUE** + * * **BPF_MAP_TYPE_STACK** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_FREEZE + * Description + * Freeze the permissions of the specified map. + * + * Write permissions may be frozen by passing zero *flags*. + * Upon success, no future syscall invocations may alter the + * map state of *map_fd*. Write operations from eBPF programs + * are still possible for a frozen map. + * + * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_BTF_GET_NEXT_ID + * Description + * Fetch the next BPF Type Format (BTF) object currently loaded + * into the kernel. + * + * Looks for the BTF object with an id greater than *start_id* + * and updates *next_id* on success. If no other BTF objects + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_LOOKUP_BATCH + * Description + * Iterate and fetch multiple elements in a map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_LOOKUP_AND_DELETE_BATCH + * Description + * Iterate and delete multiple elements in a map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_BATCH + * Description + * Iterate and update multiple elements in a map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_DELETE_BATCH + * Description + * Iterate and delete multiple elements in a map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_CREATE + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook and return a file descriptor handle for + * managing the link. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_UPDATE + * Description + * Update the eBPF program in the specified *link_fd* to + * *new_prog_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF Link corresponding to + * *link_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_GET_NEXT_ID + * Description + * Fetch the next eBPF link currently loaded into the kernel. + * + * Looks for the eBPF link with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF links + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_ENABLE_STATS + * Description + * Enable eBPF runtime statistics gathering. + * + * Runtime statistics gathering for the eBPF runtime is disabled + * by default to minimize the corresponding performance overhead. + * This command enables statistics globally. + * + * Multiple programs may independently enable statistics. + * After gathering the desired statistics, eBPF runtime statistics + * may be disabled again by calling **close**\ (2) for the file + * descriptor returned by this function. Statistics will only be + * disabled system-wide when all outstanding file descriptors + * returned by prior calls for this subcommand are closed. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_ITER_CREATE + * Description + * Create an iterator on top of the specified *link_fd* (as + * previously created using **BPF_LINK_CREATE**) and return a + * file descriptor that can be used to trigger the iteration. + * + * If the resulting file descriptor is pinned to the filesystem + * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls + * for that path will trigger the iterator to read kernel state + * using the eBPF program attached to *link_fd*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_DETACH + * Description + * Forcefully detach the specified *link_fd* from its + * corresponding attachment point. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_BIND_MAP + * Description + * Bind a map to the lifetime of an eBPF program. + * + * The map identified by *map_fd* is bound to the program + * identified by *prog_fd* and only released when *prog_fd* is + * released. This may be used in cases where metadata should be + * associated with a program which otherwise does not contain any + * references to the map (for example, embedded in the eBPF + * program instructions). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * For example, after **fork**\ (2), the child inherits file descriptors -- cgit v1.2.3-59-g8ed1b From 6690523bccb3e44cfcc4b2c995767e6814046e34 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:35 -0800 Subject: bpf: Document BPF_F_LOCK in syscall commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the meaning of the BPF_F_LOCK flag for the map lookup/update descriptions. Based on commit 96049f3afd50 ("bpf: introduce BPF_F_LOCK flag"). Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-4-joe@cilium.io --- include/uapi/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 052bbfe65f77..eb9f059f0569 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -123,6 +123,14 @@ union bpf_iter_link_info { * Look up an element with a given *key* in the map referred to * by the file descriptor *map_fd*. * + * The *flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -140,6 +148,8 @@ union bpf_iter_link_info { * Create a new element only if it did not exist. * **BPF_EXIST** * Update an existing element. + * **BPF_F_LOCK** + * Update a spin_lock-ed map element. * * Return * Returns zero on success. On error, -1 is returned and *errno* -- cgit v1.2.3-59-g8ed1b From 8aacb3c8d1a32b23c82645051bba55f0ae6c103b Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:36 -0800 Subject: bpf: Document BPF_PROG_PIN syscall command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b2197755b263 ("bpf: add support for persistent maps/progs") contains the original implementation and git logs, used as reference for this documentation. Also pull in the filename restriction as documented in commit 6d8cb045cde6 ("bpf: comment why dots in filenames under BPF virtual FS are not allowed") Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-5-joe@cilium.io --- include/uapi/linux/bpf.h | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eb9f059f0569..6946dde90c56 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -219,6 +219,22 @@ union bpf_iter_link_info { * Pin an eBPF program or map referred by the specified *bpf_fd* * to the provided *pathname* on the filesystem. * + * The *pathname* argument must not contain a dot ("."). + * + * On success, *pathname* retains a reference to the eBPF object, + * preventing deallocation of the object when the original + * *bpf_fd* is closed. This allow the eBPF object to live beyond + * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent + * process. + * + * Applying **unlink**\ (2) or similar calls to the *pathname* + * unpins the object from the filesystem, removing the reference. + * If no other file descriptors or filesystem nodes refer to the + * same object, it will be deallocated (see NOTES). + * + * The filesystem type for the parent directory of *pathname* must + * be **BPF_FS_MAGIC**. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. @@ -584,13 +600,19 @@ union bpf_iter_link_info { * * NOTES * eBPF objects (maps and programs) can be shared between processes. - * For example, after **fork**\ (2), the child inherits file descriptors - * referring to the same eBPF objects. In addition, file descriptors - * referring to eBPF objects can be transferred over UNIX domain sockets. - * File descriptors referring to eBPF objects can be duplicated in the - * usual way, using **dup**\ (2) and similar calls. An eBPF object is - * deallocated only after all file descriptors referring to the object - * have been closed. + * + * * After **fork**\ (2), the child inherits file descriptors + * referring to the same eBPF objects. + * * File descriptors referring to eBPF objects can be transferred over + * **unix**\ (7) domain sockets. + * * File descriptors referring to eBPF objects can be duplicated in the + * usual way, using **dup**\ (2) and similar calls. + * * File descriptors referring to eBPF objects can be pinned to the + * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2). + * + * An eBPF object is deallocated only after all file descriptors referring + * to the object have been closed and no references remain pinned to the + * filesystem or attached (for example, bound to a program or device). */ enum bpf_cmd { BPF_MAP_CREATE, -- cgit v1.2.3-59-g8ed1b From 32e76b187a90de5809d68c2ef3e3964176dacaf0 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:37 -0800 Subject: bpf: Document BPF_PROG_ATTACH syscall command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document the prog attach command in more detail, based on git commits: * commit f4324551489e ("bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands") * commit 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") * commit f4364dcfc86d ("media: rc: introduce BPF_PROG_LIRC_MODE2") * commit d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-6-joe@cilium.io --- include/uapi/linux/bpf.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6946dde90c56..a8f2964ec885 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -253,6 +253,43 @@ union bpf_iter_link_info { * Attach an eBPF program to a *target_fd* at the specified * *attach_type* hook. * + * The *attach_type* specifies the eBPF attachment point to + * attach the program to, and must be one of *bpf_attach_type* + * (see below). + * + * The *attach_bpf_fd* must be a valid file descriptor for a + * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap + * or sock_ops type corresponding to the specified *attach_type*. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_TYPE_SK_SKB**, + * **BPF_PROG_TYPE_SK_MSG** + * + * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**). + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. -- cgit v1.2.3-59-g8ed1b From 2a3fdca4e3bc7a01316277ba26f4090c4b19bf7c Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:38 -0800 Subject: bpf: Document BPF_PROG_TEST_RUN syscall command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on a brief read of the corresponding source code. Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-7-joe@cilium.io --- include/uapi/linux/bpf.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a8f2964ec885..a6cd6650e23d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -306,14 +306,22 @@ union bpf_iter_link_info { * * BPF_PROG_TEST_RUN * Description - * Run an eBPF program a number of times against a provided - * program context and return the modified program context and - * duration of the test run. + * Run the eBPF program associated with the *prog_fd* a *repeat* + * number of times against a provided program context *ctx_in* and + * data *data_in*, and return the modified program context + * *ctx_out*, *data_out* (for example, packet data), result of the + * execution *retval*, and *duration* of the test run. * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * **ENOSPC** + * Either *data_size_out* or *ctx_size_out* is too small. + * **ENOTSUPP** + * This command is not supported by the program type of + * the program referred to by *prog_fd*. + * * BPF_PROG_GET_NEXT_ID * Description * Fetch the next eBPF program currently loaded into the kernel. -- cgit v1.2.3-59-g8ed1b From 5d999994e05d62d4f53059540652014cf83cddfe Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:39 -0800 Subject: bpf: Document BPF_PROG_QUERY syscall command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 468e2f64d220 ("bpf: introduce BPF_PROG_QUERY command") originally introduced this, but there have been several additions since then. Unlike BPF_PROG_ATTACH, it appears that the sockmap progs are not able to be queried so far. Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-8-joe@cilium.io --- include/uapi/linux/bpf.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a6cd6650e23d..0cf92ef011f1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -389,6 +389,43 @@ union bpf_iter_link_info { * Obtain information about eBPF programs associated with the * specified *attach_type* hook. * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_QUERY** always fetches the number of programs + * attached and the *attach_flags* which were used to attach those + * programs. Additionally, if *prog_ids* is nonzero and the number + * of attached programs is less than *prog_cnt*, populates + * *prog_ids* with the eBPF program ids of the programs attached + * at *target_fd*. + * + * The following flags may alter the result: + * + * **BPF_F_QUERY_EFFECTIVE** + * Only return information regarding programs which are + * currently effective at the specified *target_fd*. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. -- cgit v1.2.3-59-g8ed1b From 0cb804547927c05f6aa7e28c8d4a1e02fec1a6d4 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:40 -0800 Subject: bpf: Document BPF_MAP_*_BATCH syscall commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based roughly on the following commits: * Commit cb4d03ab499d ("bpf: Add generic support for lookup batch op") * Commit 057996380a42 ("bpf: Add batch ops to all htab bpf map") * Commit aa2e93b8e58e ("bpf: Add generic support for update and delete batch ops") Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Acked-by: Brian Vazquez Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210302171947.2268128-9-joe@cilium.io --- include/uapi/linux/bpf.h | 114 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0cf92ef011f1..c8b9d19fce22 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -553,13 +553,55 @@ union bpf_iter_link_info { * Description * Iterate and fetch multiple elements in a map. * + * Two opaque values are used to manage batch operations, + * *in_batch* and *out_batch*. Initially, *in_batch* must be set + * to NULL to begin the batched operation. After each subsequent + * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant + * *out_batch* as the *in_batch* for the next operation to + * continue iteration from the current point. + * + * The *keys* and *values* are output parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are copied into the + * user buffer, with the keys copied into *keys* and the values + * copied into the corresponding indices in *values*. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * May set *errno* to **ENOSPC** to indicate that *keys* or + * *values* is too small to dump an entire bucket during + * iteration of a hash-based map type. + * * BPF_MAP_LOOKUP_AND_DELETE_BATCH * Description - * Iterate and delete multiple elements in a map. + * Iterate and delete all elements in a map. + * + * This operation has the same behavior as + * **BPF_MAP_LOOKUP_BATCH** with two exceptions: + * + * * Every element that is successfully returned is also deleted + * from the map. This is at least *count* elements. Note that + * *count* is both an input and an output parameter. + * * Upon returning with *errno* set to **EFAULT**, up to + * *count* elements may be deleted without returning the keys + * and values of the deleted elements. * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -567,15 +609,81 @@ union bpf_iter_link_info { * * BPF_MAP_UPDATE_BATCH * Description - * Iterate and update multiple elements in a map. + * Update multiple elements in a map by *key*. + * + * The *keys* and *values* are input parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * Each element specified in *keys* is sequentially updated to the + * value in the corresponding index in *values*. The *in_batch* + * and *out_batch* parameters are ignored and should be zeroed. + * + * The *elem_flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create new elements or update a existing elements. + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * **BPF_EXIST** + * Update existing elements. + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. * * Return * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or + * **E2BIG**. **E2BIG** indicates that the number of elements in + * the map reached the *max_entries* limit specified at map + * creation time. + * + * May set *errno* to one of the following error codes under + * specific circumstances: + * + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * * BPF_MAP_DELETE_BATCH * Description - * Iterate and delete multiple elements in a map. + * Delete multiple elements in a map by *key*. + * + * The *keys* parameter is an input parameter which must point + * to memory large enough to hold *count* items based on the key + * size of the map *map_fd*, that is, *key_size* * *count*. + * + * Each element specified in *keys* is sequentially deleted. The + * *in_batch*, *out_batch*, and *values* parameters are ignored + * and should be zeroed. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. If + * *errno* is **EFAULT**, up to *count* elements may be been + * deleted. * * Return * Returns zero on success. On error, -1 is returned and *errno* -- cgit v1.2.3-59-g8ed1b From 923a932c982fd71856f80dbeaaa3ca41a75e89e0 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 2 Mar 2021 09:19:41 -0800 Subject: scripts/bpf: Abstract eBPF API target parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Abstract out the target parameter so that upcoming commits, more than just the existing "helpers" target can be called to generate specific portions of docs from the eBPF UAPI headers. Signed-off-by: Joe Stringer Signed-off-by: Alexei Starovoitov Reviewed-by: Quentin Monnet Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210302171947.2268128-10-joe@cilium.io --- MAINTAINERS | 1 + include/uapi/linux/bpf.h | 2 +- scripts/bpf_doc.py | 650 +++++++++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 615 -------------------------------------- tools/bpf/Makefile.helpers | 2 +- tools/include/uapi/linux/bpf.h | 2 +- tools/lib/bpf/Makefile | 2 +- tools/perf/MANIFEST | 2 +- 8 files changed, 656 insertions(+), 620 deletions(-) create mode 100755 scripts/bpf_doc.py delete mode 100755 scripts/bpf_helpers_doc.py (limited to 'include/uapi/linux/bpf.h') diff --git a/MAINTAINERS b/MAINTAINERS index a50a543e3c81..8d56c7044067 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3223,6 +3223,7 @@ F: net/core/filter.c F: net/sched/act_bpf.c F: net/sched/cls_bpf.c F: samples/bpf/ +F: scripts/bpf_doc.py F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c8b9d19fce22..63a56ed6a785 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1439,7 +1439,7 @@ union bpf_attr { * parsed and used to produce a manual page. The workflow is the following, * and requires the rst2man utility: * - * $ ./scripts/bpf_helpers_doc.py \ + * $ ./scripts/bpf_doc.py \ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 * $ man /tmp/bpf-helpers.7 diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py new file mode 100755 index 000000000000..5a4f68aab335 --- /dev/null +++ b/scripts/bpf_doc.py @@ -0,0 +1,650 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2018-2019 Netronome Systems, Inc. +# Copyright (C) 2021 Isovalent, Inc. + +# In case user attempts to run with Python 2. +from __future__ import print_function + +import argparse +import re +import sys, os + +class NoHelperFound(BaseException): + pass + +class ParsingError(BaseException): + def __init__(self, line='', reader=None): + if reader: + BaseException.__init__(self, + 'Error at file offset %d, parsing line: %s' % + (reader.tell(), line)) + else: + BaseException.__init__(self, 'Error parsing line: %s' % line) + +class Helper(object): + """ + An object representing the description of an eBPF helper function. + @proto: function prototype of the helper function + @desc: textual description of the helper function + @ret: description of the return value of the helper function + """ + def __init__(self, proto='', desc='', ret=''): + self.proto = proto + self.desc = desc + self.ret = ret + + def proto_break_down(self): + """ + Break down helper function protocol into smaller chunks: return type, + name, distincts arguments. + """ + arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$') + res = {} + proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') + + capture = proto_re.match(self.proto) + res['ret_type'] = capture.group(1) + res['ret_star'] = capture.group(2) + res['name'] = capture.group(3) + res['args'] = [] + + args = capture.group(4).split(', ') + for a in args: + capture = arg_re.match(a) + res['args'].append({ + 'type' : capture.group(1), + 'star' : capture.group(5), + 'name' : capture.group(6) + }) + + return res + +class HeaderParser(object): + """ + An object used to parse a file in order to extract the documentation of a + list of eBPF helper functions. All the helpers that can be retrieved are + stored as Helper object, in the self.helpers() array. + @filename: name of file to parse, usually include/uapi/linux/bpf.h in the + kernel tree + """ + def __init__(self, filename): + self.reader = open(filename, 'r') + self.line = '' + self.helpers = [] + + def parse_helper(self): + proto = self.parse_proto() + desc = self.parse_desc() + ret = self.parse_ret() + return Helper(proto=proto, desc=desc, ret=ret) + + def parse_proto(self): + # Argument can be of shape: + # - "void" + # - "type name" + # - "type *name" + # - Same as above, with "const" and/or "struct" in front of type + # - "..." (undefined number of arguments, for bpf_trace_printk()) + # There is at least one term ("void"), and at most five arguments. + p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$') + capture = p.match(self.line) + if not capture: + raise NoHelperFound + self.line = self.reader.readline() + return capture.group(1) + + def parse_desc(self): + p = re.compile(' \* ?(?:\t| {5,8})Description$') + capture = p.match(self.line) + if not capture: + # Helper can have empty description and we might be parsing another + # attribute: return but do not consume. + return '' + # Description can be several lines, some of them possibly empty, and it + # stops when another subsection title is met. + desc = '' + while True: + self.line = self.reader.readline() + if self.line == ' *\n': + desc += '\n' + else: + p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') + capture = p.match(self.line) + if capture: + desc += capture.group(1) + '\n' + else: + break + return desc + + def parse_ret(self): + p = re.compile(' \* ?(?:\t| {5,8})Return$') + capture = p.match(self.line) + if not capture: + # Helper can have empty retval and we might be parsing another + # attribute: return but do not consume. + return '' + # Return value description can be several lines, some of them possibly + # empty, and it stops when another subsection title is met. + ret = '' + while True: + self.line = self.reader.readline() + if self.line == ' *\n': + ret += '\n' + else: + p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') + capture = p.match(self.line) + if capture: + ret += capture.group(1) + '\n' + else: + break + return ret + + def run(self): + # Advance to start of helper function descriptions. + offset = self.reader.read().find('* Start of BPF helper function descriptions:') + if offset == -1: + raise Exception('Could not find start of eBPF helper descriptions list') + self.reader.seek(offset) + self.reader.readline() + self.reader.readline() + self.line = self.reader.readline() + + while True: + try: + helper = self.parse_helper() + self.helpers.append(helper) + except NoHelperFound: + break + + self.reader.close() + +############################################################################### + +class Printer(object): + """ + A generic class for printers. Printers should be created with an array of + Helper objects, and implement a way to print them in the desired fashion. + @parser: A HeaderParser with objects to print to standard output + """ + def __init__(self, parser): + self.parser = parser + self.elements = [] + + def print_header(self): + pass + + def print_footer(self): + pass + + def print_one(self, helper): + pass + + def print_all(self): + self.print_header() + for elem in self.elements: + self.print_one(elem) + self.print_footer() + + +class PrinterRST(Printer): + """ + A generic class for printers that print ReStructured Text. Printers should + be created with a HeaderParser object, and implement a way to print API + elements in the desired fashion. + @parser: A HeaderParser with objects to print to standard output + """ + def __init__(self, parser): + self.parser = parser + + def print_license(self): + license = '''\ +.. Copyright (C) All BPF authors and contributors from 2014 to present. +.. See git log include/uapi/linux/bpf.h in kernel tree for details. +.. +.. %%%LICENSE_START(VERBATIM) +.. Permission is granted to make and distribute verbatim copies of this +.. manual provided the copyright notice and this permission notice are +.. preserved on all copies. +.. +.. Permission is granted to copy and distribute modified versions of this +.. manual under the conditions for verbatim copying, provided that the +.. entire resulting derived work is distributed under the terms of a +.. permission notice identical to this one. +.. +.. Since the Linux kernel and libraries are constantly changing, this +.. manual page may be incorrect or out-of-date. The author(s) assume no +.. responsibility for errors or omissions, or for damages resulting from +.. the use of the information contained herein. The author(s) may not +.. have taken the same level of care in the production of this manual, +.. which is licensed free of charge, as they might when working +.. professionally. +.. +.. Formatted or processed versions of this manual, if unaccompanied by +.. the source, must acknowledge the copyright and authors of this work. +.. %%%LICENSE_END +.. +.. Please do not edit this file. It was generated from the documentation +.. located in file include/uapi/linux/bpf.h of the Linux kernel sources +.. (helpers description), and from scripts/bpf_doc.py in the same +.. repository (header and footer). +''' + print(license) + + def print_elem(self, elem): + if (elem.desc): + print('\tDescription') + # Do not strip all newline characters: formatted code at the end of + # a section must be followed by a blank line. + for line in re.sub('\n$', '', elem.desc, count=1).split('\n'): + print('{}{}'.format('\t\t' if line else '', line)) + + if (elem.ret): + print('\tReturn') + for line in elem.ret.rstrip().split('\n'): + print('{}{}'.format('\t\t' if line else '', line)) + + print('') + + +class PrinterHelpersRST(PrinterRST): + """ + A printer for dumping collected information about helpers as a ReStructured + Text page compatible with the rst2man program, which can be used to + generate a manual page for the helpers. + @parser: A HeaderParser with Helper objects to print to standard output + """ + def __init__(self, parser): + self.elements = parser.helpers + + def print_header(self): + header = '''\ +=========== +BPF-HELPERS +=========== +------------------------------------------------------------------------------- +list of eBPF helper functions +------------------------------------------------------------------------------- + +:Manual section: 7 + +DESCRIPTION +=========== + +The extended Berkeley Packet Filter (eBPF) subsystem consists in programs +written in a pseudo-assembly language, then attached to one of the several +kernel hooks and run in reaction of specific events. This framework differs +from the older, "classic" BPF (or "cBPF") in several aspects, one of them being +the ability to call special functions (or "helpers") from within a program. +These functions are restricted to a white-list of helpers defined in the +kernel. + +These helpers are used by eBPF programs to interact with the system, or with +the context in which they work. For instance, they can be used to print +debugging messages, to get the time since the system was booted, to interact +with eBPF maps, or to manipulate network packets. Since there are several eBPF +program types, and that they do not run in the same context, each program type +can only call a subset of those helpers. + +Due to eBPF conventions, a helper can not have more than five arguments. + +Internally, eBPF programs call directly into the compiled helper functions +without requiring any foreign-function interface. As a result, calling helpers +introduces no overhead, thus offering excellent performance. + +This document is an attempt to list and document the helpers available to eBPF +developers. They are sorted by chronological order (the oldest helpers in the +kernel at the top). + +HELPERS +======= +''' + PrinterRST.print_license(self) + print(header) + + def print_footer(self): + footer = ''' +EXAMPLES +======== + +Example usage for most of the eBPF helpers listed in this manual page are +available within the Linux kernel sources, at the following locations: + +* *samples/bpf/* +* *tools/testing/selftests/bpf/* + +LICENSE +======= + +eBPF programs can have an associated license, passed along with the bytecode +instructions to the kernel when the programs are loaded. The format for that +string is identical to the one in use for kernel modules (Dual licenses, such +as "Dual BSD/GPL", may be used). Some helper functions are only accessible to +programs that are compatible with the GNU Privacy License (GPL). + +In order to use such helpers, the eBPF program must be loaded with the correct +license string passed (via **attr**) to the **bpf**\ () system call, and this +generally translates into the C source code of the program containing a line +similar to the following: + +:: + + char ____license[] __attribute__((section("license"), used)) = "GPL"; + +IMPLEMENTATION +============== + +This manual page is an effort to document the existing eBPF helper functions. +But as of this writing, the BPF sub-system is under heavy development. New eBPF +program or map types are added, along with new helper functions. Some helpers +are occasionally made available for additional program types. So in spite of +the efforts of the community, this page might not be up-to-date. If you want to +check by yourself what helper functions exist in your kernel, or what types of +programs they can support, here are some files among the kernel tree that you +may be interested in: + +* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list + of all helper functions, as well as many other BPF definitions including most + of the flags, structs or constants used by the helpers. +* *net/core/filter.c* contains the definition of most network-related helper + functions, and the list of program types from which they can be used. +* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related + helpers. +* *kernel/bpf/verifier.c* contains the functions used to check that valid types + of eBPF maps are used with a given helper function. +* *kernel/bpf/* directory contains other files in which additional helpers are + defined (for cgroups, sockmaps, etc.). +* The bpftool utility can be used to probe the availability of helper functions + on the system (as well as supported program and map types, and a number of + other parameters). To do so, run **bpftool feature probe** (see + **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to + list features available to unprivileged users. + +Compatibility between helper functions and program types can generally be found +in the files where helper functions are defined. Look for the **struct +bpf_func_proto** objects and for functions returning them: these functions +contain a list of helpers that a given program type can call. Note that the +**default:** label of the **switch ... case** used to filter helpers can call +other functions, themselves allowing access to additional helpers. The +requirement for GPL license is also in those **struct bpf_func_proto**. + +Compatibility between helper functions and map types can be found in the +**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*. + +Helper functions that invalidate the checks on **data** and **data_end** +pointers for network processing are listed in function +**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*. + +SEE ALSO +======== + +**bpf**\ (2), +**bpftool**\ (8), +**cgroups**\ (7), +**ip**\ (8), +**perf_event_open**\ (2), +**sendmsg**\ (2), +**socket**\ (7), +**tc-bpf**\ (8)''' + print(footer) + + def print_proto(self, helper): + """ + Format function protocol with bold and italics markers. This makes RST + file less readable, but gives nice results in the manual page. + """ + proto = helper.proto_break_down() + + print('**%s %s%s(' % (proto['ret_type'], + proto['ret_star'].replace('*', '\\*'), + proto['name']), + end='') + + comma = '' + for a in proto['args']: + one_arg = '{}{}'.format(comma, a['type']) + if a['name']: + if a['star']: + one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*')) + else: + one_arg += '** ' + one_arg += '*{}*\\ **'.format(a['name']) + comma = ', ' + print(one_arg, end='') + + print(')**') + + def print_one(self, helper): + self.print_proto(helper) + self.print_elem(helper) + + + + +class PrinterHelpers(Printer): + """ + A printer for dumping collected information about helpers as C header to + be included from BPF program. + @parser: A HeaderParser with Helper objects to print to standard output + """ + def __init__(self, parser): + self.elements = parser.helpers + + type_fwds = [ + 'struct bpf_fib_lookup', + 'struct bpf_sk_lookup', + 'struct bpf_perf_event_data', + 'struct bpf_perf_event_value', + 'struct bpf_pidns_info', + 'struct bpf_redir_neigh', + 'struct bpf_sock', + 'struct bpf_sock_addr', + 'struct bpf_sock_ops', + 'struct bpf_sock_tuple', + 'struct bpf_spin_lock', + 'struct bpf_sysctl', + 'struct bpf_tcp_sock', + 'struct bpf_tunnel_key', + 'struct bpf_xfrm_state', + 'struct linux_binprm', + 'struct pt_regs', + 'struct sk_reuseport_md', + 'struct sockaddr', + 'struct tcphdr', + 'struct seq_file', + 'struct tcp6_sock', + 'struct tcp_sock', + 'struct tcp_timewait_sock', + 'struct tcp_request_sock', + 'struct udp6_sock', + 'struct task_struct', + + 'struct __sk_buff', + 'struct sk_msg_md', + 'struct xdp_md', + 'struct path', + 'struct btf_ptr', + 'struct inode', + 'struct socket', + 'struct file', + ] + known_types = { + '...', + 'void', + 'const void', + 'char', + 'const char', + 'int', + 'long', + 'unsigned long', + + '__be16', + '__be32', + '__wsum', + + 'struct bpf_fib_lookup', + 'struct bpf_perf_event_data', + 'struct bpf_perf_event_value', + 'struct bpf_pidns_info', + 'struct bpf_redir_neigh', + 'struct bpf_sk_lookup', + 'struct bpf_sock', + 'struct bpf_sock_addr', + 'struct bpf_sock_ops', + 'struct bpf_sock_tuple', + 'struct bpf_spin_lock', + 'struct bpf_sysctl', + 'struct bpf_tcp_sock', + 'struct bpf_tunnel_key', + 'struct bpf_xfrm_state', + 'struct linux_binprm', + 'struct pt_regs', + 'struct sk_reuseport_md', + 'struct sockaddr', + 'struct tcphdr', + 'struct seq_file', + 'struct tcp6_sock', + 'struct tcp_sock', + 'struct tcp_timewait_sock', + 'struct tcp_request_sock', + 'struct udp6_sock', + 'struct task_struct', + 'struct path', + 'struct btf_ptr', + 'struct inode', + 'struct socket', + 'struct file', + } + mapped_types = { + 'u8': '__u8', + 'u16': '__u16', + 'u32': '__u32', + 'u64': '__u64', + 's8': '__s8', + 's16': '__s16', + 's32': '__s32', + 's64': '__s64', + 'size_t': 'unsigned long', + 'struct bpf_map': 'void', + 'struct sk_buff': 'struct __sk_buff', + 'const struct sk_buff': 'const struct __sk_buff', + 'struct sk_msg_buff': 'struct sk_msg_md', + 'struct xdp_buff': 'struct xdp_md', + } + # Helpers overloaded for different context types. + overloaded_helpers = [ + 'bpf_get_socket_cookie', + 'bpf_sk_assign', + ] + + def print_header(self): + header = '''\ +/* This is auto-generated file. See bpf_doc.py for details. */ + +/* Forward declarations of BPF structs */''' + + print(header) + for fwd in self.type_fwds: + print('%s;' % fwd) + print('') + + def print_footer(self): + footer = '' + print(footer) + + def map_type(self, t): + if t in self.known_types: + return t + if t in self.mapped_types: + return self.mapped_types[t] + print("Unrecognized type '%s', please add it to known types!" % t, + file=sys.stderr) + sys.exit(1) + + seen_helpers = set() + + def print_one(self, helper): + proto = helper.proto_break_down() + + if proto['name'] in self.seen_helpers: + return + self.seen_helpers.add(proto['name']) + + print('/*') + print(" * %s" % proto['name']) + print(" *") + if (helper.desc): + # Do not strip all newline characters: formatted code at the end of + # a section must be followed by a blank line. + for line in re.sub('\n$', '', helper.desc, count=1).split('\n'): + print(' *{}{}'.format(' \t' if line else '', line)) + + if (helper.ret): + print(' *') + print(' * Returns') + for line in helper.ret.rstrip().split('\n'): + print(' *{}{}'.format(' \t' if line else '', line)) + + print(' */') + print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']), + proto['ret_star'], proto['name']), end='') + comma = '' + for i, a in enumerate(proto['args']): + t = a['type'] + n = a['name'] + if proto['name'] in self.overloaded_helpers and i == 0: + t = 'void' + n = 'ctx' + one_arg = '{}{}'.format(comma, self.map_type(t)) + if n: + if a['star']: + one_arg += ' {}'.format(a['star']) + else: + one_arg += ' ' + one_arg += '{}'.format(n) + comma = ', ' + print(one_arg, end='') + + print(') = (void *) %d;' % len(self.seen_helpers)) + print('') + +############################################################################### + +# If script is launched from scripts/ from kernel tree and can access +# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse, +# otherwise the --filename argument will be required from the command line. +script = os.path.abspath(sys.argv[0]) +linuxRoot = os.path.dirname(os.path.dirname(script)) +bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h') + +printers = { + 'helpers': PrinterHelpersRST, +} + +argParser = argparse.ArgumentParser(description=""" +Parse eBPF header file and generate documentation for the eBPF API. +The RST-formatted output produced can be turned into a manual page with the +rst2man utility. +""") +argParser.add_argument('--header', action='store_true', + help='generate C header file') +if (os.path.isfile(bpfh)): + argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h', + default=bpfh) +else: + argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h') +argParser.add_argument('target', nargs='?', default='helpers', + choices=printers.keys(), help='eBPF API target') +args = argParser.parse_args() + +# Parse file. +headerParser = HeaderParser(args.filename) +headerParser.run() + +# Print formatted output to standard output. +if args.header: + printer = PrinterHelpers(headerParser) +else: + printer = printers[args.target](headerParser) +printer.print_all() diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py deleted file mode 100755 index 867ada23281c..000000000000 --- a/scripts/bpf_helpers_doc.py +++ /dev/null @@ -1,615 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# -# Copyright (C) 2018-2019 Netronome Systems, Inc. - -# In case user attempts to run with Python 2. -from __future__ import print_function - -import argparse -import re -import sys, os - -class NoHelperFound(BaseException): - pass - -class ParsingError(BaseException): - def __init__(self, line='', reader=None): - if reader: - BaseException.__init__(self, - 'Error at file offset %d, parsing line: %s' % - (reader.tell(), line)) - else: - BaseException.__init__(self, 'Error parsing line: %s' % line) - -class Helper(object): - """ - An object representing the description of an eBPF helper function. - @proto: function prototype of the helper function - @desc: textual description of the helper function - @ret: description of the return value of the helper function - """ - def __init__(self, proto='', desc='', ret=''): - self.proto = proto - self.desc = desc - self.ret = ret - - def proto_break_down(self): - """ - Break down helper function protocol into smaller chunks: return type, - name, distincts arguments. - """ - arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$') - res = {} - proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$') - - capture = proto_re.match(self.proto) - res['ret_type'] = capture.group(1) - res['ret_star'] = capture.group(2) - res['name'] = capture.group(3) - res['args'] = [] - - args = capture.group(4).split(', ') - for a in args: - capture = arg_re.match(a) - res['args'].append({ - 'type' : capture.group(1), - 'star' : capture.group(5), - 'name' : capture.group(6) - }) - - return res - -class HeaderParser(object): - """ - An object used to parse a file in order to extract the documentation of a - list of eBPF helper functions. All the helpers that can be retrieved are - stored as Helper object, in the self.helpers() array. - @filename: name of file to parse, usually include/uapi/linux/bpf.h in the - kernel tree - """ - def __init__(self, filename): - self.reader = open(filename, 'r') - self.line = '' - self.helpers = [] - - def parse_helper(self): - proto = self.parse_proto() - desc = self.parse_desc() - ret = self.parse_ret() - return Helper(proto=proto, desc=desc, ret=ret) - - def parse_proto(self): - # Argument can be of shape: - # - "void" - # - "type name" - # - "type *name" - # - Same as above, with "const" and/or "struct" in front of type - # - "..." (undefined number of arguments, for bpf_trace_printk()) - # There is at least one term ("void"), and at most five arguments. - p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$') - capture = p.match(self.line) - if not capture: - raise NoHelperFound - self.line = self.reader.readline() - return capture.group(1) - - def parse_desc(self): - p = re.compile(' \* ?(?:\t| {5,8})Description$') - capture = p.match(self.line) - if not capture: - # Helper can have empty description and we might be parsing another - # attribute: return but do not consume. - return '' - # Description can be several lines, some of them possibly empty, and it - # stops when another subsection title is met. - desc = '' - while True: - self.line = self.reader.readline() - if self.line == ' *\n': - desc += '\n' - else: - p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') - capture = p.match(self.line) - if capture: - desc += capture.group(1) + '\n' - else: - break - return desc - - def parse_ret(self): - p = re.compile(' \* ?(?:\t| {5,8})Return$') - capture = p.match(self.line) - if not capture: - # Helper can have empty retval and we might be parsing another - # attribute: return but do not consume. - return '' - # Return value description can be several lines, some of them possibly - # empty, and it stops when another subsection title is met. - ret = '' - while True: - self.line = self.reader.readline() - if self.line == ' *\n': - ret += '\n' - else: - p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') - capture = p.match(self.line) - if capture: - ret += capture.group(1) + '\n' - else: - break - return ret - - def run(self): - # Advance to start of helper function descriptions. - offset = self.reader.read().find('* Start of BPF helper function descriptions:') - if offset == -1: - raise Exception('Could not find start of eBPF helper descriptions list') - self.reader.seek(offset) - self.reader.readline() - self.reader.readline() - self.line = self.reader.readline() - - while True: - try: - helper = self.parse_helper() - self.helpers.append(helper) - except NoHelperFound: - break - - self.reader.close() - -############################################################################### - -class Printer(object): - """ - A generic class for printers. Printers should be created with an array of - Helper objects, and implement a way to print them in the desired fashion. - @helpers: array of Helper objects to print to standard output - """ - def __init__(self, helpers): - self.helpers = helpers - - def print_header(self): - pass - - def print_footer(self): - pass - - def print_one(self, helper): - pass - - def print_all(self): - self.print_header() - for helper in self.helpers: - self.print_one(helper) - self.print_footer() - -class PrinterRST(Printer): - """ - A printer for dumping collected information about helpers as a ReStructured - Text page compatible with the rst2man program, which can be used to - generate a manual page for the helpers. - @helpers: array of Helper objects to print to standard output - """ - def print_header(self): - header = '''\ -.. Copyright (C) All BPF authors and contributors from 2014 to present. -.. See git log include/uapi/linux/bpf.h in kernel tree for details. -.. -.. %%%LICENSE_START(VERBATIM) -.. Permission is granted to make and distribute verbatim copies of this -.. manual provided the copyright notice and this permission notice are -.. preserved on all copies. -.. -.. Permission is granted to copy and distribute modified versions of this -.. manual under the conditions for verbatim copying, provided that the -.. entire resulting derived work is distributed under the terms of a -.. permission notice identical to this one. -.. -.. Since the Linux kernel and libraries are constantly changing, this -.. manual page may be incorrect or out-of-date. The author(s) assume no -.. responsibility for errors or omissions, or for damages resulting from -.. the use of the information contained herein. The author(s) may not -.. have taken the same level of care in the production of this manual, -.. which is licensed free of charge, as they might when working -.. professionally. -.. -.. Formatted or processed versions of this manual, if unaccompanied by -.. the source, must acknowledge the copyright and authors of this work. -.. %%%LICENSE_END -.. -.. Please do not edit this file. It was generated from the documentation -.. located in file include/uapi/linux/bpf.h of the Linux kernel sources -.. (helpers description), and from scripts/bpf_helpers_doc.py in the same -.. repository (header and footer). - -=========== -BPF-HELPERS -=========== -------------------------------------------------------------------------------- -list of eBPF helper functions -------------------------------------------------------------------------------- - -:Manual section: 7 - -DESCRIPTION -=========== - -The extended Berkeley Packet Filter (eBPF) subsystem consists in programs -written in a pseudo-assembly language, then attached to one of the several -kernel hooks and run in reaction of specific events. This framework differs -from the older, "classic" BPF (or "cBPF") in several aspects, one of them being -the ability to call special functions (or "helpers") from within a program. -These functions are restricted to a white-list of helpers defined in the -kernel. - -These helpers are used by eBPF programs to interact with the system, or with -the context in which they work. For instance, they can be used to print -debugging messages, to get the time since the system was booted, to interact -with eBPF maps, or to manipulate network packets. Since there are several eBPF -program types, and that they do not run in the same context, each program type -can only call a subset of those helpers. - -Due to eBPF conventions, a helper can not have more than five arguments. - -Internally, eBPF programs call directly into the compiled helper functions -without requiring any foreign-function interface. As a result, calling helpers -introduces no overhead, thus offering excellent performance. - -This document is an attempt to list and document the helpers available to eBPF -developers. They are sorted by chronological order (the oldest helpers in the -kernel at the top). - -HELPERS -======= -''' - print(header) - - def print_footer(self): - footer = ''' -EXAMPLES -======== - -Example usage for most of the eBPF helpers listed in this manual page are -available within the Linux kernel sources, at the following locations: - -* *samples/bpf/* -* *tools/testing/selftests/bpf/* - -LICENSE -======= - -eBPF programs can have an associated license, passed along with the bytecode -instructions to the kernel when the programs are loaded. The format for that -string is identical to the one in use for kernel modules (Dual licenses, such -as "Dual BSD/GPL", may be used). Some helper functions are only accessible to -programs that are compatible with the GNU Privacy License (GPL). - -In order to use such helpers, the eBPF program must be loaded with the correct -license string passed (via **attr**) to the **bpf**\ () system call, and this -generally translates into the C source code of the program containing a line -similar to the following: - -:: - - char ____license[] __attribute__((section("license"), used)) = "GPL"; - -IMPLEMENTATION -============== - -This manual page is an effort to document the existing eBPF helper functions. -But as of this writing, the BPF sub-system is under heavy development. New eBPF -program or map types are added, along with new helper functions. Some helpers -are occasionally made available for additional program types. So in spite of -the efforts of the community, this page might not be up-to-date. If you want to -check by yourself what helper functions exist in your kernel, or what types of -programs they can support, here are some files among the kernel tree that you -may be interested in: - -* *include/uapi/linux/bpf.h* is the main BPF header. It contains the full list - of all helper functions, as well as many other BPF definitions including most - of the flags, structs or constants used by the helpers. -* *net/core/filter.c* contains the definition of most network-related helper - functions, and the list of program types from which they can be used. -* *kernel/trace/bpf_trace.c* is the equivalent for most tracing program-related - helpers. -* *kernel/bpf/verifier.c* contains the functions used to check that valid types - of eBPF maps are used with a given helper function. -* *kernel/bpf/* directory contains other files in which additional helpers are - defined (for cgroups, sockmaps, etc.). -* The bpftool utility can be used to probe the availability of helper functions - on the system (as well as supported program and map types, and a number of - other parameters). To do so, run **bpftool feature probe** (see - **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to - list features available to unprivileged users. - -Compatibility between helper functions and program types can generally be found -in the files where helper functions are defined. Look for the **struct -bpf_func_proto** objects and for functions returning them: these functions -contain a list of helpers that a given program type can call. Note that the -**default:** label of the **switch ... case** used to filter helpers can call -other functions, themselves allowing access to additional helpers. The -requirement for GPL license is also in those **struct bpf_func_proto**. - -Compatibility between helper functions and map types can be found in the -**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*. - -Helper functions that invalidate the checks on **data** and **data_end** -pointers for network processing are listed in function -**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*. - -SEE ALSO -======== - -**bpf**\ (2), -**bpftool**\ (8), -**cgroups**\ (7), -**ip**\ (8), -**perf_event_open**\ (2), -**sendmsg**\ (2), -**socket**\ (7), -**tc-bpf**\ (8)''' - print(footer) - - def print_proto(self, helper): - """ - Format function protocol with bold and italics markers. This makes RST - file less readable, but gives nice results in the manual page. - """ - proto = helper.proto_break_down() - - print('**%s %s%s(' % (proto['ret_type'], - proto['ret_star'].replace('*', '\\*'), - proto['name']), - end='') - - comma = '' - for a in proto['args']: - one_arg = '{}{}'.format(comma, a['type']) - if a['name']: - if a['star']: - one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*')) - else: - one_arg += '** ' - one_arg += '*{}*\\ **'.format(a['name']) - comma = ', ' - print(one_arg, end='') - - print(')**') - - def print_one(self, helper): - self.print_proto(helper) - - if (helper.desc): - print('\tDescription') - # Do not strip all newline characters: formatted code at the end of - # a section must be followed by a blank line. - for line in re.sub('\n$', '', helper.desc, count=1).split('\n'): - print('{}{}'.format('\t\t' if line else '', line)) - - if (helper.ret): - print('\tReturn') - for line in helper.ret.rstrip().split('\n'): - print('{}{}'.format('\t\t' if line else '', line)) - - print('') - -class PrinterHelpers(Printer): - """ - A printer for dumping collected information about helpers as C header to - be included from BPF program. - @helpers: array of Helper objects to print to standard output - """ - - type_fwds = [ - 'struct bpf_fib_lookup', - 'struct bpf_sk_lookup', - 'struct bpf_perf_event_data', - 'struct bpf_perf_event_value', - 'struct bpf_pidns_info', - 'struct bpf_redir_neigh', - 'struct bpf_sock', - 'struct bpf_sock_addr', - 'struct bpf_sock_ops', - 'struct bpf_sock_tuple', - 'struct bpf_spin_lock', - 'struct bpf_sysctl', - 'struct bpf_tcp_sock', - 'struct bpf_tunnel_key', - 'struct bpf_xfrm_state', - 'struct linux_binprm', - 'struct pt_regs', - 'struct sk_reuseport_md', - 'struct sockaddr', - 'struct tcphdr', - 'struct seq_file', - 'struct tcp6_sock', - 'struct tcp_sock', - 'struct tcp_timewait_sock', - 'struct tcp_request_sock', - 'struct udp6_sock', - 'struct task_struct', - - 'struct __sk_buff', - 'struct sk_msg_md', - 'struct xdp_md', - 'struct path', - 'struct btf_ptr', - 'struct inode', - 'struct socket', - 'struct file', - ] - known_types = { - '...', - 'void', - 'const void', - 'char', - 'const char', - 'int', - 'long', - 'unsigned long', - - '__be16', - '__be32', - '__wsum', - - 'struct bpf_fib_lookup', - 'struct bpf_perf_event_data', - 'struct bpf_perf_event_value', - 'struct bpf_pidns_info', - 'struct bpf_redir_neigh', - 'struct bpf_sk_lookup', - 'struct bpf_sock', - 'struct bpf_sock_addr', - 'struct bpf_sock_ops', - 'struct bpf_sock_tuple', - 'struct bpf_spin_lock', - 'struct bpf_sysctl', - 'struct bpf_tcp_sock', - 'struct bpf_tunnel_key', - 'struct bpf_xfrm_state', - 'struct linux_binprm', - 'struct pt_regs', - 'struct sk_reuseport_md', - 'struct sockaddr', - 'struct tcphdr', - 'struct seq_file', - 'struct tcp6_sock', - 'struct tcp_sock', - 'struct tcp_timewait_sock', - 'struct tcp_request_sock', - 'struct udp6_sock', - 'struct task_struct', - 'struct path', - 'struct btf_ptr', - 'struct inode', - 'struct socket', - 'struct file', - } - mapped_types = { - 'u8': '__u8', - 'u16': '__u16', - 'u32': '__u32', - 'u64': '__u64', - 's8': '__s8', - 's16': '__s16', - 's32': '__s32', - 's64': '__s64', - 'size_t': 'unsigned long', - 'struct bpf_map': 'void', - 'struct sk_buff': 'struct __sk_buff', - 'const struct sk_buff': 'const struct __sk_buff', - 'struct sk_msg_buff': 'struct sk_msg_md', - 'struct xdp_buff': 'struct xdp_md', - } - # Helpers overloaded for different context types. - overloaded_helpers = [ - 'bpf_get_socket_cookie', - 'bpf_sk_assign', - ] - - def print_header(self): - header = '''\ -/* This is auto-generated file. See bpf_helpers_doc.py for details. */ - -/* Forward declarations of BPF structs */''' - - print(header) - for fwd in self.type_fwds: - print('%s;' % fwd) - print('') - - def print_footer(self): - footer = '' - print(footer) - - def map_type(self, t): - if t in self.known_types: - return t - if t in self.mapped_types: - return self.mapped_types[t] - print("Unrecognized type '%s', please add it to known types!" % t, - file=sys.stderr) - sys.exit(1) - - seen_helpers = set() - - def print_one(self, helper): - proto = helper.proto_break_down() - - if proto['name'] in self.seen_helpers: - return - self.seen_helpers.add(proto['name']) - - print('/*') - print(" * %s" % proto['name']) - print(" *") - if (helper.desc): - # Do not strip all newline characters: formatted code at the end of - # a section must be followed by a blank line. - for line in re.sub('\n$', '', helper.desc, count=1).split('\n'): - print(' *{}{}'.format(' \t' if line else '', line)) - - if (helper.ret): - print(' *') - print(' * Returns') - for line in helper.ret.rstrip().split('\n'): - print(' *{}{}'.format(' \t' if line else '', line)) - - print(' */') - print('static %s %s(*%s)(' % (self.map_type(proto['ret_type']), - proto['ret_star'], proto['name']), end='') - comma = '' - for i, a in enumerate(proto['args']): - t = a['type'] - n = a['name'] - if proto['name'] in self.overloaded_helpers and i == 0: - t = 'void' - n = 'ctx' - one_arg = '{}{}'.format(comma, self.map_type(t)) - if n: - if a['star']: - one_arg += ' {}'.format(a['star']) - else: - one_arg += ' ' - one_arg += '{}'.format(n) - comma = ', ' - print(one_arg, end='') - - print(') = (void *) %d;' % len(self.seen_helpers)) - print('') - -############################################################################### - -# If script is launched from scripts/ from kernel tree and can access -# ../include/uapi/linux/bpf.h, use it as a default name for the file to parse, -# otherwise the --filename argument will be required from the command line. -script = os.path.abspath(sys.argv[0]) -linuxRoot = os.path.dirname(os.path.dirname(script)) -bpfh = os.path.join(linuxRoot, 'include/uapi/linux/bpf.h') - -argParser = argparse.ArgumentParser(description=""" -Parse eBPF header file and generate documentation for eBPF helper functions. -The RST-formatted output produced can be turned into a manual page with the -rst2man utility. -""") -argParser.add_argument('--header', action='store_true', - help='generate C header file') -if (os.path.isfile(bpfh)): - argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h', - default=bpfh) -else: - argParser.add_argument('--filename', help='path to include/uapi/linux/bpf.h') -args = argParser.parse_args() - -# Parse file. -headerParser = HeaderParser(args.filename) -headerParser.run() - -# Print formatted output to standard output. -if args.header: - printer = PrinterHelpers(headerParser.helpers) -else: - printer = PrinterRST(headerParser.helpers) -printer.print_all() diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers index 854d084026dd..a26599022fd6 100644 --- a/tools/bpf/Makefile.helpers +++ b/tools/bpf/Makefile.helpers @@ -35,7 +35,7 @@ man7: $(DOC_MAN7) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) $(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h - $(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@ + $(QUIET_GEN)$(UP2DIR)../../scripts/bpf_doc.py --filename $< > $@ $(OUTPUT)%.7: $(OUTPUT)%.rst ifndef RST2MAN_DEP diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b89af20cfa19..b4c5c529ad17 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -729,7 +729,7 @@ union bpf_attr { * parsed and used to produce a manual page. The workflow is the following, * and requires the rst2man utility: * - * $ ./scripts/bpf_helpers_doc.py \ + * $ ./scripts/bpf_doc.py \ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 * $ man /tmp/bpf-helpers.7 diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 887a494ad5fc..8170f88e8ea6 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -158,7 +158,7 @@ $(BPF_IN_STATIC): force $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h - $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \ + $(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \ --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS) $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 5d7b947320fb..f05c4d48fd7e 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -20,4 +20,4 @@ tools/lib/bitmap.c tools/lib/str_error_r.c tools/lib/vsprintf.c tools/lib/zalloc.c -scripts/bpf_helpers_doc.py +scripts/bpf_doc.py -- cgit v1.2.3-59-g8ed1b From 7c32e8f8bc33a5f4b113a630857e46634e3e143b Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Wed, 3 Mar 2021 10:18:13 +0000 Subject: bpf: Add PROG_TEST_RUN support for sk_lookup programs Allow to pass sk_lookup programs to PROG_TEST_RUN. User space provides the full bpf_sk_lookup struct as context. Since the context includes a socket pointer that can't be exposed to user space we define that PROG_TEST_RUN returns the cookie of the selected socket or zero in place of the socket pointer. We don't support testing programs that select a reuseport socket, since this would mean running another (unrelated) BPF program from the sk_lookup test handler. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210303101816.36774-3-lmb@cloudflare.com --- include/linux/bpf.h | 10 ++++ include/uapi/linux/bpf.h | 5 +- net/bpf/test_run.c | 105 +++++++++++++++++++++++++++++++++++++++++ net/core/filter.c | 1 + tools/include/uapi/linux/bpf.h | 5 +- 5 files changed, 124 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4c730863fa77..c931bc97019d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1491,6 +1491,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@ -1692,6 +1695,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, return -ENOTSUPP; } +static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + static inline void bpf_map_put(struct bpf_map *map) { } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 63a56ed6a785..7f530e349aff 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5953,7 +5953,10 @@ struct bpf_pidns_info { /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ struct bpf_sk_lookup { - __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + }; __u32 family; /* Protocol family (AF_INET, AF_INET6) */ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index eb3c78cd4d7c..0abdd67f44b1 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -10,8 +10,10 @@ #include #include #include +#include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -781,3 +783,106 @@ out: kfree(data); return ret; } + +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + struct bpf_test_timer t = { NO_PREEMPT }; + struct bpf_prog_array *progs = NULL; + struct bpf_sk_lookup_kern ctx = {}; + u32 repeat = kattr->test.repeat; + struct bpf_sk_lookup *user_ctx; + u32 retval, duration; + int ret = -EINVAL; + + if (prog->type != BPF_PROG_TYPE_SK_LOOKUP) + return -EINVAL; + + if (kattr->test.flags || kattr->test.cpu) + return -EINVAL; + + if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out || + kattr->test.data_size_out) + return -EINVAL; + + if (!repeat) + repeat = 1; + + user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx)); + if (IS_ERR(user_ctx)) + return PTR_ERR(user_ctx); + + if (!user_ctx) + return -EINVAL; + + if (user_ctx->sk) + goto out; + + if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) + goto out; + + if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { + ret = -ERANGE; + goto out; + } + + ctx.family = (u16)user_ctx->family; + ctx.protocol = (u16)user_ctx->protocol; + ctx.dport = (u16)user_ctx->local_port; + ctx.sport = (__force __be16)user_ctx->remote_port; + + switch (ctx.family) { + case AF_INET: + ctx.v4.daddr = (__force __be32)user_ctx->local_ip4; + ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4; + break; + +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6; + ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6; + break; +#endif + + default: + ret = -EAFNOSUPPORT; + goto out; + } + + progs = bpf_prog_array_alloc(1, GFP_KERNEL); + if (!progs) { + ret = -ENOMEM; + goto out; + } + + progs->items[0].prog = prog; + + bpf_test_timer_enter(&t); + do { + ctx.selected_sk = NULL; + retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN); + } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); + bpf_test_timer_leave(&t); + + if (ret < 0) + goto out; + + user_ctx->cookie = 0; + if (ctx.selected_sk) { + if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) { + ret = -EOPNOTSUPP; + goto out; + } + + user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); + } + + ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); + +out: + bpf_prog_array_free(progs); + kfree(user_ctx); + return ret; +} diff --git a/net/core/filter.c b/net/core/filter.c index 13bcf248ee7b..a526db494c62 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10457,6 +10457,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, } const struct bpf_prog_ops sk_lookup_prog_ops = { + .test_run = bpf_prog_test_run_sk_lookup, }; const struct bpf_verifier_ops sk_lookup_verifier_ops = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 63a56ed6a785..7f530e349aff 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5953,7 +5953,10 @@ struct bpf_pidns_info { /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ struct bpf_sk_lookup { - __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + }; __u32 family; /* Protocol family (AF_INET, AF_INET6) */ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ -- cgit v1.2.3-59-g8ed1b From d01b59c9ae94560fbcceaafeef39784d72765033 Mon Sep 17 00:00:00 2001 From: Xuesen Huang Date: Thu, 4 Mar 2021 14:40:46 +0800 Subject: bpf: Add bpf_skb_adjust_room flag BPF_F_ADJ_ROOM_ENCAP_L2_ETH bpf_skb_adjust_room sets the inner_protocol as skb->protocol for packets encapsulation. But that is not appropriate when pushing Ethernet header. Add an option to further specify encap L2 type and set the inner_protocol as ETH_P_TEB. Suggested-by: Willem de Bruijn Signed-off-by: Xuesen Huang Signed-off-by: Zhiyong Cheng Signed-off-by: Li Wang Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Link: https://lore.kernel.org/bpf/20210304064046.6232-1-hxseverything@gmail.com --- include/uapi/linux/bpf.h | 5 +++++ net/core/filter.c | 11 ++++++++++- tools/include/uapi/linux/bpf.h | 5 +++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/bpf.h') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7f530e349aff..2d3036e292a9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2484,6 +2484,10 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4916,6 +4920,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index a526db494c62..588b19ba0da8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3409,6 +3409,7 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb) BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ + BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ BPF_ADJ_ROOM_ENCAP_L2_MASK)) @@ -3445,6 +3446,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) return -EINVAL; + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && + inner_mac_len < ETH_HLEN) + return -EINVAL; + if (skb->encapsulation) return -EALREADY; @@ -3463,7 +3468,11 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, skb->inner_mac_header = inner_net - inner_mac_len; skb->inner_network_header = inner_net; skb->inner_transport_header = inner_trans; - skb_set_inner_protocol(skb, skb->protocol); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + else + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_set_network_header(skb, mac_len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7f530e349aff..2d3036e292a9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2484,6 +2484,10 @@ union bpf_attr { * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@ -4916,6 +4920,7 @@ enum { BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), }; enum { -- cgit v1.2.3-59-g8ed1b