diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/arraymap.c | 1 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 393 | ||||
-rw-r--r-- | kernel/bpf/cpumap.c | 2 | ||||
-rw-r--r-- | kernel/bpf/local_storage.c | 84 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 1 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 80 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 241 | ||||
-rw-r--r-- | kernel/module.c | 5 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 99 |
9 files changed, 804 insertions, 102 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 24583da9ffd1..25632a75d630 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -382,6 +382,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, } static int array_map_check_btf(const struct bpf_map *map, + const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index bf34933cc413..715f9fcf4712 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -164,7 +164,7 @@ #define BITS_ROUNDUP_BYTES(bits) \ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits)) -#define BTF_INFO_MASK 0x0f00ffff +#define BTF_INFO_MASK 0x8f00ffff #define BTF_INT_MASK 0x0fffffff #define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE) #define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET) @@ -274,6 +274,10 @@ struct btf_kind_operations { const struct btf_type *struct_type, const struct btf_member *member, const struct btf_type *member_type); + int (*check_kflag_member)(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type); void (*log_details)(struct btf_verifier_env *env, const struct btf_type *t); void (*seq_show)(const struct btf *btf, const struct btf_type *t, @@ -419,6 +423,25 @@ static u16 btf_type_vlen(const struct btf_type *t) return BTF_INFO_VLEN(t->info); } +static bool btf_type_kflag(const struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info); +} + +static u32 btf_member_bit_offset(const struct btf_type *struct_type, + const struct btf_member *member) +{ + return btf_type_kflag(struct_type) ? BTF_MEMBER_BIT_OFFSET(member->offset) + : member->offset; +} + +static u32 btf_member_bitfield_size(const struct btf_type *struct_type, + const struct btf_member *member) +{ + return btf_type_kflag(struct_type) ? BTF_MEMBER_BITFIELD_SIZE(member->offset) + : 0; +} + static u32 btf_type_int(const struct btf_type *t) { return *(u32 *)(t + 1); @@ -474,7 +497,7 @@ static bool btf_name_valid_identifier(const struct btf *btf, u32 offset) return !*src; } -const char *btf_name_by_offset(const struct btf *btf, u32 offset) +static const char *__btf_name_by_offset(const struct btf *btf, u32 offset) { if (!offset) return "(anon)"; @@ -484,6 +507,14 @@ const char *btf_name_by_offset(const struct btf *btf, u32 offset) return "(invalid-name-offset)"; } +const char *btf_name_by_offset(const struct btf *btf, u32 offset) +{ + if (offset < btf->hdr.str_len) + return &btf->strings[offset]; + + return NULL; +} + const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) { if (type_id > btf->nr_types) @@ -514,6 +545,47 @@ static bool btf_type_int_is_regular(const struct btf_type *t) return true; } +/* + * Check that given struct member is a regular int with expected + * offset and size. + */ +bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, + const struct btf_member *m, + u32 expected_offset, u32 expected_size) +{ + const struct btf_type *t; + u32 id, int_data; + u8 nr_bits; + + id = m->type; + t = btf_type_id_size(btf, &id, NULL); + if (!t || !btf_type_is_int(t)) + return false; + + int_data = btf_type_int(t); + nr_bits = BTF_INT_BITS(int_data); + if (btf_type_kflag(s)) { + u32 bitfield_size = BTF_MEMBER_BITFIELD_SIZE(m->offset); + u32 bit_offset = BTF_MEMBER_BIT_OFFSET(m->offset); + + /* if kflag set, int should be a regular int and + * bit offset should be at byte boundary. + */ + return !bitfield_size && + BITS_ROUNDUP_BYTES(bit_offset) == expected_offset && + BITS_ROUNDUP_BYTES(nr_bits) == expected_size; + } + + if (BTF_INT_OFFSET(int_data) || + BITS_PER_BYTE_MASKED(m->offset) || + BITS_ROUNDUP_BYTES(m->offset) != expected_offset || + BITS_PER_BYTE_MASKED(nr_bits) || + BITS_ROUNDUP_BYTES(nr_bits) != expected_size) + return false; + + return true; +} + __printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log, const char *fmt, ...) { @@ -554,7 +626,7 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, __btf_verifier_log(log, "[%u] %s %s%s", env->log_type_id, btf_kind_str[kind], - btf_name_by_offset(btf, t->name_off), + __btf_name_by_offset(btf, t->name_off), log_details ? " " : ""); if (log_details) @@ -597,9 +669,17 @@ static void btf_verifier_log_member(struct btf_verifier_env *env, if (env->phase != CHECK_META) btf_verifier_log_type(env, struct_type, NULL); - __btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u", - btf_name_by_offset(btf, member->name_off), - member->type, member->offset); + if (btf_type_kflag(struct_type)) + __btf_verifier_log(log, + "\t%s type_id=%u bitfield_size=%u bits_offset=%u", + __btf_name_by_offset(btf, member->name_off), + member->type, + BTF_MEMBER_BITFIELD_SIZE(member->offset), + BTF_MEMBER_BIT_OFFSET(member->offset)); + else + __btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u", + __btf_name_by_offset(btf, member->name_off), + member->type, member->offset); if (fmt && *fmt) { __btf_verifier_log(log, " "); @@ -915,6 +995,38 @@ static int btf_df_check_member(struct btf_verifier_env *env, return -EINVAL; } +static int btf_df_check_kflag_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + btf_verifier_log_basic(env, struct_type, + "Unsupported check_kflag_member"); + return -EINVAL; +} + +/* Used for ptr, array and struct/union type members. + * int, enum and modifier types have their specific callback functions. + */ +static int btf_generic_check_kflag_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + if (BTF_MEMBER_BITFIELD_SIZE(member->offset)) { + btf_verifier_log_member(env, struct_type, member, + "Invalid member bitfield_size"); + return -EINVAL; + } + + /* bitfield size is 0, so member->offset represents bit offset only. + * It is safe to call non kflag check_member variants. + */ + return btf_type_ops(member_type)->check_member(env, struct_type, + member, + member_type); +} + static int btf_df_resolve(struct btf_verifier_env *env, const struct resolve_vertex *v) { @@ -967,6 +1079,62 @@ static int btf_int_check_member(struct btf_verifier_env *env, return 0; } +static int btf_int_check_kflag_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u32 struct_bits_off, nr_bits, nr_int_data_bits, bytes_offset; + u32 int_data = btf_type_int(member_type); + u32 struct_size = struct_type->size; + u32 nr_copy_bits; + + /* a regular int type is required for the kflag int member */ + if (!btf_type_int_is_regular(member_type)) { + btf_verifier_log_member(env, struct_type, member, + "Invalid member base type"); + return -EINVAL; + } + + /* check sanity of bitfield size */ + nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset); + struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset); + nr_int_data_bits = BTF_INT_BITS(int_data); + if (!nr_bits) { + /* Not a bitfield member, member offset must be at byte + * boundary. + */ + if (BITS_PER_BYTE_MASKED(struct_bits_off)) { + btf_verifier_log_member(env, struct_type, member, + "Invalid member offset"); + return -EINVAL; + } + + nr_bits = nr_int_data_bits; + } else if (nr_bits > nr_int_data_bits) { + btf_verifier_log_member(env, struct_type, member, + "Invalid member bitfield_size"); + return -EINVAL; + } + + bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); + nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); + if (nr_copy_bits > BITS_PER_U64) { + btf_verifier_log_member(env, struct_type, member, + "nr_copy_bits exceeds 64"); + return -EINVAL; + } + + if (struct_size < bytes_offset || + struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + static s32 btf_int_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) @@ -986,6 +1154,11 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + int_data = btf_type_int(t); if (int_data & ~BTF_INT_MASK) { btf_verifier_log_basic(env, t, "Invalid int_data:%x", @@ -1038,26 +1211,16 @@ static void btf_int_log(struct btf_verifier_env *env, btf_int_encoding_str(BTF_INT_ENCODING(int_data))); } -static void btf_int_bits_seq_show(const struct btf *btf, - const struct btf_type *t, - void *data, u8 bits_offset, - struct seq_file *m) +static void btf_bitfield_seq_show(void *data, u8 bits_offset, + u8 nr_bits, struct seq_file *m) { u16 left_shift_bits, right_shift_bits; - u32 int_data = btf_type_int(t); - u8 nr_bits = BTF_INT_BITS(int_data); - u8 total_bits_offset; u8 nr_copy_bytes; u8 nr_copy_bits; u64 print_num; - /* - * bits_offset is at most 7. - * BTF_INT_OFFSET() cannot exceed 64 bits. - */ - total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); - data += BITS_ROUNDDOWN_BYTES(total_bits_offset); - bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + data += BITS_ROUNDDOWN_BYTES(bits_offset); + bits_offset = BITS_PER_BYTE_MASKED(bits_offset); nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); @@ -1077,6 +1240,24 @@ static void btf_int_bits_seq_show(const struct btf *btf, seq_printf(m, "0x%llx", print_num); } + +static void btf_int_bits_seq_show(const struct btf *btf, + const struct btf_type *t, + void *data, u8 bits_offset, + struct seq_file *m) +{ + u32 int_data = btf_type_int(t); + u8 nr_bits = BTF_INT_BITS(int_data); + u8 total_bits_offset; + + /* + * bits_offset is at most 7. + * BTF_INT_OFFSET() cannot exceed 64 bits. + */ + total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); + btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m); +} + static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct seq_file *m) @@ -1126,6 +1307,7 @@ static const struct btf_kind_operations int_ops = { .check_meta = btf_int_check_meta, .resolve = btf_df_resolve, .check_member = btf_int_check_member, + .check_kflag_member = btf_int_check_kflag_member, .log_details = btf_int_log, .seq_show = btf_int_seq_show, }; @@ -1155,6 +1337,31 @@ static int btf_modifier_check_member(struct btf_verifier_env *env, resolved_type); } +static int btf_modifier_check_kflag_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + const struct btf_type *resolved_type; + u32 resolved_type_id = member->type; + struct btf_member resolved_member; + struct btf *btf = env->btf; + + resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL); + if (!resolved_type) { + btf_verifier_log_member(env, struct_type, member, + "Invalid member"); + return -EINVAL; + } + + resolved_member = *member; + resolved_member.type = resolved_type_id; + + return btf_type_ops(resolved_type)->check_kflag_member(env, struct_type, + &resolved_member, + resolved_type); +} + static int btf_ptr_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, @@ -1190,6 +1397,11 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + if (!BTF_TYPE_ID_VALID(t->type)) { btf_verifier_log_type(env, t, "Invalid type_id"); return -EINVAL; @@ -1343,6 +1555,7 @@ static struct btf_kind_operations modifier_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_modifier_resolve, .check_member = btf_modifier_check_member, + .check_kflag_member = btf_modifier_check_kflag_member, .log_details = btf_ref_type_log, .seq_show = btf_modifier_seq_show, }; @@ -1351,6 +1564,7 @@ static struct btf_kind_operations ptr_ops = { .check_meta = btf_ref_type_check_meta, .resolve = btf_ptr_resolve, .check_member = btf_ptr_check_member, + .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_ref_type_log, .seq_show = btf_ptr_seq_show, }; @@ -1381,11 +1595,18 @@ static s32 btf_fwd_check_meta(struct btf_verifier_env *env, return 0; } +static void btf_fwd_type_log(struct btf_verifier_env *env, + const struct btf_type *t) +{ + btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct"); +} + static struct btf_kind_operations fwd_ops = { .check_meta = btf_fwd_check_meta, .resolve = btf_df_resolve, .check_member = btf_df_check_member, - .log_details = btf_ref_type_log, + .check_kflag_member = btf_df_check_kflag_member, + .log_details = btf_fwd_type_log, .seq_show = btf_df_seq_show, }; @@ -1443,6 +1664,11 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + if (t->size) { btf_verifier_log_type(env, t, "size != 0"); return -EINVAL; @@ -1566,6 +1792,7 @@ static struct btf_kind_operations array_ops = { .check_meta = btf_array_check_meta, .resolve = btf_array_resolve, .check_member = btf_array_check_member, + .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_array_log, .seq_show = btf_array_seq_show, }; @@ -1604,6 +1831,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, u32 meta_needed, last_offset; struct btf *btf = env->btf; u32 struct_size = t->size; + u32 offset; u16 i; meta_needed = btf_type_vlen(t) * sizeof(*member); @@ -1645,7 +1873,8 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, return -EINVAL; } - if (is_union && member->offset) { + offset = btf_member_bit_offset(t, member); + if (is_union && offset) { btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); return -EINVAL; @@ -1655,20 +1884,20 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env, * ">" instead of ">=" because the last member could be * "char a[0];" */ - if (last_offset > member->offset) { + if (last_offset > offset) { btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); return -EINVAL; } - if (BITS_ROUNDUP_BYTES(member->offset) > struct_size) { + if (BITS_ROUNDUP_BYTES(offset) > struct_size) { btf_verifier_log_member(env, t, member, "Member bits_offset exceeds its struct size"); return -EINVAL; } btf_verifier_log_member(env, t, member, NULL); - last_offset = member->offset; + last_offset = offset; } return meta_needed; @@ -1698,9 +1927,14 @@ static int btf_struct_resolve(struct btf_verifier_env *env, last_member_type = btf_type_by_id(env->btf, last_member_type_id); - err = btf_type_ops(last_member_type)->check_member(env, v->t, - last_member, - last_member_type); + if (btf_type_kflag(v->t)) + err = btf_type_ops(last_member_type)->check_kflag_member(env, v->t, + last_member, + last_member_type); + else + err = btf_type_ops(last_member_type)->check_member(env, v->t, + last_member, + last_member_type); if (err) return err; } @@ -1722,9 +1956,14 @@ static int btf_struct_resolve(struct btf_verifier_env *env, return env_stack_push(env, member_type, member_type_id); } - err = btf_type_ops(member_type)->check_member(env, v->t, - member, - member_type); + if (btf_type_kflag(v->t)) + err = btf_type_ops(member_type)->check_kflag_member(env, v->t, + member, + member_type); + else + err = btf_type_ops(member_type)->check_member(env, v->t, + member, + member_type); if (err) return err; } @@ -1752,17 +1991,26 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, for_each_member(i, t, member) { const struct btf_type *member_type = btf_type_by_id(btf, member->type); - u32 member_offset = member->offset; - u32 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); - u8 bits8_offset = BITS_PER_BYTE_MASKED(member_offset); const struct btf_kind_operations *ops; + u32 member_offset, bitfield_size; + u32 bytes_offset; + u8 bits8_offset; if (i) seq_puts(m, seq); - ops = btf_type_ops(member_type); - ops->seq_show(btf, member_type, member->type, - data + bytes_offset, bits8_offset, m); + member_offset = btf_member_bit_offset(t, member); + bitfield_size = btf_member_bitfield_size(t, member); + if (bitfield_size) { + btf_bitfield_seq_show(data, member_offset, + bitfield_size, m); + } else { + bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); + bits8_offset = BITS_PER_BYTE_MASKED(member_offset); + ops = btf_type_ops(member_type); + ops->seq_show(btf, member_type, member->type, + data + bytes_offset, bits8_offset, m); + } } seq_puts(m, "}"); } @@ -1771,6 +2019,7 @@ static struct btf_kind_operations struct_ops = { .check_meta = btf_struct_check_meta, .resolve = btf_struct_resolve, .check_member = btf_struct_check_member, + .check_kflag_member = btf_generic_check_kflag_member, .log_details = btf_struct_log, .seq_show = btf_struct_seq_show, }; @@ -1800,6 +2049,41 @@ static int btf_enum_check_member(struct btf_verifier_env *env, return 0; } +static int btf_enum_check_kflag_member(struct btf_verifier_env *env, + const struct btf_type *struct_type, + const struct btf_member *member, + const struct btf_type *member_type) +{ + u32 struct_bits_off, nr_bits, bytes_end, struct_size; + u32 int_bitsize = sizeof(int) * BITS_PER_BYTE; + + struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset); + nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset); + if (!nr_bits) { + if (BITS_PER_BYTE_MASKED(struct_bits_off)) { + btf_verifier_log_member(env, struct_type, member, + "Member is not byte aligned"); + return -EINVAL; + } + + nr_bits = int_bitsize; + } else if (nr_bits > int_bitsize) { + btf_verifier_log_member(env, struct_type, member, + "Invalid member bitfield_size"); + return -EINVAL; + } + + struct_size = struct_type->size; + bytes_end = BITS_ROUNDUP_BYTES(struct_bits_off + nr_bits); + if (struct_size < bytes_end) { + btf_verifier_log_member(env, struct_type, member, + "Member exceeds struct_size"); + return -EINVAL; + } + + return 0; +} + static s32 btf_enum_check_meta(struct btf_verifier_env *env, const struct btf_type *t, u32 meta_left) @@ -1819,6 +2103,11 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + if (t->size != sizeof(int)) { btf_verifier_log_type(env, t, "Expected size:%zu", sizeof(int)); @@ -1850,7 +2139,7 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, btf_verifier_log(env, "\t%s val=%d\n", - btf_name_by_offset(btf, enums[i].name_off), + __btf_name_by_offset(btf, enums[i].name_off), enums[i].val); } @@ -1874,7 +2163,8 @@ static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t, for (i = 0; i < nr_enums; i++) { if (v == enums[i].val) { seq_printf(m, "%s", - btf_name_by_offset(btf, enums[i].name_off)); + __btf_name_by_offset(btf, + enums[i].name_off)); return; } } @@ -1886,6 +2176,7 @@ static struct btf_kind_operations enum_ops = { .check_meta = btf_enum_check_meta, .resolve = btf_df_resolve, .check_member = btf_enum_check_member, + .check_kflag_member = btf_enum_check_kflag_member, .log_details = btf_enum_log, .seq_show = btf_enum_seq_show, }; @@ -1908,6 +2199,11 @@ static s32 btf_func_proto_check_meta(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + btf_verifier_log_type(env, t, NULL); return meta_needed; @@ -1932,20 +2228,20 @@ static void btf_func_proto_log(struct btf_verifier_env *env, } btf_verifier_log(env, "%u %s", args[0].type, - btf_name_by_offset(env->btf, - args[0].name_off)); + __btf_name_by_offset(env->btf, + args[0].name_off)); for (i = 1; i < nr_args - 1; i++) btf_verifier_log(env, ", %u %s", args[i].type, - btf_name_by_offset(env->btf, - args[i].name_off)); + __btf_name_by_offset(env->btf, + args[i].name_off)); if (nr_args > 1) { const struct btf_param *last_arg = &args[nr_args - 1]; if (last_arg->type) btf_verifier_log(env, ", %u %s", last_arg->type, - btf_name_by_offset(env->btf, - last_arg->name_off)); + __btf_name_by_offset(env->btf, + last_arg->name_off)); else btf_verifier_log(env, ", vararg"); } @@ -1967,6 +2263,7 @@ static struct btf_kind_operations func_proto_ops = { * Hence, there is no btf_func_check_member(). */ .check_member = btf_df_check_member, + .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_func_proto_log, .seq_show = btf_df_seq_show, }; @@ -1986,6 +2283,11 @@ static s32 btf_func_check_meta(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_kflag(t)) { + btf_verifier_log_type(env, t, "Invalid btf_info kind_flag"); + return -EINVAL; + } + btf_verifier_log_type(env, t, NULL); return 0; @@ -1995,6 +2297,7 @@ static struct btf_kind_operations func_ops = { .check_meta = btf_func_check_meta, .resolve = btf_df_resolve, .check_member = btf_df_check_member, + .check_kflag_member = btf_df_check_kflag_member, .log_details = btf_ref_type_log, .seq_show = btf_df_seq_show, }; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 24aac0d0f412..8974b3755670 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -183,7 +183,7 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, * is not at a fixed memory location, with mixed length * packets, which is bad for cache-line hotness. */ - frame_size = SKB_DATA_ALIGN(xdpf->len) + xdpf->headroom + + frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); pkt_data_start = xdpf->data - xdpf->headroom; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index b65017dead44..07a34ef562a0 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -1,11 +1,13 @@ //SPDX-License-Identifier: GPL-2.0 #include <linux/bpf-cgroup.h> #include <linux/bpf.h> +#include <linux/btf.h> #include <linux/bug.h> #include <linux/filter.h> #include <linux/mm.h> #include <linux/rbtree.h> #include <linux/slab.h> +#include <uapi/linux/btf.h> DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); @@ -308,6 +310,85 @@ static int cgroup_storage_delete_elem(struct bpf_map *map, void *key) return -EINVAL; } +static int cgroup_storage_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + struct btf_member *m; + u32 offset, size; + + /* Key is expected to be of struct bpf_cgroup_storage_key type, + * which is: + * struct bpf_cgroup_storage_key { + * __u64 cgroup_inode_id; + * __u32 attach_type; + * }; + */ + + /* + * Key_type must be a structure with two fields. + */ + if (BTF_INFO_KIND(key_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(key_type->info) != 2) + return -EINVAL; + + /* + * The first field must be a 64 bit integer at 0 offset. + */ + m = (struct btf_member *)(key_type + 1); + size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id); + if (!btf_member_is_reg_int(btf, key_type, m, 0, size)) + return -EINVAL; + + /* + * The second field must be a 32 bit integer at 64 bit offset. + */ + m++; + offset = offsetof(struct bpf_cgroup_storage_key, attach_type); + size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type); + if (!btf_member_is_reg_int(btf, key_type, m, offset, size)) + return -EINVAL; + + return 0; +} + +static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *_key, + struct seq_file *m) +{ + enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); + struct bpf_cgroup_storage_key *key = _key; + struct bpf_cgroup_storage *storage; + int cpu; + + rcu_read_lock(); + storage = cgroup_storage_lookup(map_to_storage(map), key, false); + if (!storage) { + rcu_read_unlock(); + return; + } + + btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); + stype = cgroup_storage_type(map); + if (stype == BPF_CGROUP_STORAGE_SHARED) { + seq_puts(m, ": "); + btf_type_seq_show(map->btf, map->btf_value_type_id, + &READ_ONCE(storage->buf)->data[0], m); + seq_puts(m, "\n"); + } else { + seq_puts(m, ": {\n"); + for_each_possible_cpu(cpu) { + seq_printf(m, "\tcpu%d: ", cpu); + btf_type_seq_show(map->btf, map->btf_value_type_id, + per_cpu_ptr(storage->percpu_buf, cpu), + m); + seq_puts(m, "\n"); + } + seq_puts(m, "}\n"); + } + rcu_read_unlock(); +} + const struct bpf_map_ops cgroup_storage_map_ops = { .map_alloc = cgroup_storage_map_alloc, .map_free = cgroup_storage_map_free, @@ -315,7 +396,8 @@ const struct bpf_map_ops cgroup_storage_map_ops = { .map_lookup_elem = cgroup_storage_lookup_elem, .map_update_elem = cgroup_storage_update_elem, .map_delete_elem = cgroup_storage_delete_elem, - .map_check_btf = map_check_no_btf, + .map_check_btf = cgroup_storage_check_btf, + .map_seq_show_elem = cgroup_storage_seq_show_elem, }; int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index bfd4882e1106..abf1002080df 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -728,6 +728,7 @@ free_stack: } static int trie_check_btf(const struct bpf_map *map, + const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5745c7837621..0607db304def 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -456,6 +456,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) } int map_check_no_btf(const struct bpf_map *map, + const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { @@ -478,7 +479,7 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf, return -EINVAL; if (map->ops->map_check_btf) - ret = map->ops->map_check_btf(map, key_type, value_type); + ret = map->ops->map_check_btf(map, btf, key_type, value_type); return ret; } @@ -1472,11 +1473,6 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS) return -E2BIG; - - if (type == BPF_PROG_TYPE_KPROBE && - attr->kern_version != LINUX_VERSION_CODE) - return -EINVAL; - if (type != BPF_PROG_TYPE_SOCKET_FILTER && type != BPF_PROG_TYPE_CGROUP_SKB && !capable(CAP_SYS_ADMIN)) @@ -1608,6 +1604,7 @@ static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp) bpf_probe_unregister(raw_tp->btp, raw_tp->prog); bpf_prog_put(raw_tp->prog); } + bpf_put_raw_tracepoint(raw_tp->btp); kfree(raw_tp); return 0; } @@ -1633,13 +1630,15 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) return -EFAULT; tp_name[sizeof(tp_name) - 1] = 0; - btp = bpf_find_raw_tracepoint(tp_name); + btp = bpf_get_raw_tracepoint(tp_name); if (!btp) return -ENOENT; raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); - if (!raw_tp) - return -ENOMEM; + if (!raw_tp) { + err = -ENOMEM; + goto out_put_btp; + } raw_tp->btp = btp; prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd, @@ -1667,6 +1666,8 @@ out_put_prog: bpf_prog_put(prog); out_free_tp: kfree(raw_tp); +out_put_btp: + bpf_put_raw_tracepoint(btp); return err; } @@ -2031,13 +2032,6 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog) insns[i + 1].imm = 0; continue; } - - if (!bpf_dump_raw_ok() && - imm == (unsigned long)prog->aux) { - insns[i].imm = 0; - insns[i + 1].imm = 0; - continue; - } } return insns; @@ -2271,33 +2265,25 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ulen = info.nr_func_info; info.nr_func_info = prog->aux->func_info_cnt; if (info.nr_func_info && ulen) { - if (bpf_dump_raw_ok()) { - char __user *user_finfo; + char __user *user_finfo; - user_finfo = u64_to_user_ptr(info.func_info); - ulen = min_t(u32, info.nr_func_info, ulen); - if (copy_to_user(user_finfo, prog->aux->func_info, - info.func_info_rec_size * ulen)) - return -EFAULT; - } else { - info.func_info = 0; - } + user_finfo = u64_to_user_ptr(info.func_info); + ulen = min_t(u32, info.nr_func_info, ulen); + if (copy_to_user(user_finfo, prog->aux->func_info, + info.func_info_rec_size * ulen)) + return -EFAULT; } ulen = info.nr_line_info; info.nr_line_info = prog->aux->nr_linfo; if (info.nr_line_info && ulen) { - if (bpf_dump_raw_ok()) { - __u8 __user *user_linfo; + __u8 __user *user_linfo; - user_linfo = u64_to_user_ptr(info.line_info); - ulen = min_t(u32, info.nr_line_info, ulen); - if (copy_to_user(user_linfo, prog->aux->linfo, - info.line_info_rec_size * ulen)) - return -EFAULT; - } else { - info.line_info = 0; - } + user_linfo = u64_to_user_ptr(info.line_info); + ulen = min_t(u32, info.nr_line_info, ulen); + if (copy_to_user(user_linfo, prog->aux->linfo, + info.line_info_rec_size * ulen)) + return -EFAULT; } ulen = info.nr_jited_line_info; @@ -2322,6 +2308,28 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } } + ulen = info.nr_prog_tags; + info.nr_prog_tags = prog->aux->func_cnt ? : 1; + if (ulen) { + __u8 __user (*user_prog_tags)[BPF_TAG_SIZE]; + u32 i; + + user_prog_tags = u64_to_user_ptr(info.prog_tags); + ulen = min_t(u32, info.nr_prog_tags, ulen); + if (prog->aux->func_cnt) { + for (i = 0; i < ulen; i++) { + if (copy_to_user(user_prog_tags[i], + prog->aux->func[i]->tag, + BPF_TAG_SIZE)) + return -EFAULT; + } + } else { + if (copy_to_user(user_prog_tags[0], + prog->tag, BPF_TAG_SIZE)) + return -EFAULT; + } + } + done: if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5b3c0a95244f..71d86e3024ae 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -26,6 +26,7 @@ #include <linux/bsearch.h> #include <linux/sort.h> #include <linux/perf_event.h> +#include <linux/ctype.h> #include "disasm.h" @@ -216,6 +217,27 @@ struct bpf_call_arg_meta { static DEFINE_MUTEX(bpf_verifier_lock); +static const struct bpf_line_info * +find_linfo(const struct bpf_verifier_env *env, u32 insn_off) +{ + const struct bpf_line_info *linfo; + const struct bpf_prog *prog; + u32 i, nr_linfo; + + prog = env->prog; + nr_linfo = prog->aux->nr_linfo; + + if (!nr_linfo || insn_off >= prog->len) + return NULL; + + linfo = prog->aux->linfo; + for (i = 1; i < nr_linfo; i++) + if (insn_off < linfo[i].insn_off) + break; + + return &linfo[i - 1]; +} + void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args) { @@ -266,6 +288,42 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) va_end(args); } +static const char *ltrim(const char *s) +{ + while (isspace(*s)) + s++; + + return s; +} + +__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, + u32 insn_off, + const char *prefix_fmt, ...) +{ + const struct bpf_line_info *linfo; + + if (!bpf_verifier_log_needed(&env->log)) + return; + + linfo = find_linfo(env, insn_off); + if (!linfo || linfo == env->prev_linfo) + return; + + if (prefix_fmt) { + va_list args; + + va_start(args, prefix_fmt); + bpf_verifier_vlog(&env->log, prefix_fmt, args); + va_end(args); + } + + verbose(env, "%s\n", + ltrim(btf_name_by_offset(env->prog->aux->btf, + linfo->line_off))); + + env->prev_linfo = linfo; +} + static bool type_is_pkt_pointer(enum bpf_reg_type type) { return type == PTR_TO_PACKET || @@ -339,12 +397,14 @@ static char slot_type_char[] = { static void print_liveness(struct bpf_verifier_env *env, enum bpf_reg_liveness live) { - if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN)) + if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) verbose(env, "_"); if (live & REG_LIVE_READ) verbose(env, "r"); if (live & REG_LIVE_WRITTEN) verbose(env, "w"); + if (live & REG_LIVE_DONE) + verbose(env, "D"); } static struct bpf_func_state *func(struct bpf_verifier_env *env, @@ -1074,6 +1134,12 @@ static int mark_reg_read(struct bpf_verifier_env *env, /* if read wasn't screened by an earlier write ... */ if (writes && state->live & REG_LIVE_WRITTEN) break; + if (parent->live & REG_LIVE_DONE) { + verbose(env, "verifier BUG type %s var_off %lld off %d\n", + reg_type_str[parent->type], + parent->var_off.value, parent->off); + return -EFAULT; + } /* ... then we depend on parent's value */ parent->live |= REG_LIVE_READ; state = parent; @@ -1220,6 +1286,10 @@ static int check_stack_write(struct bpf_verifier_env *env, /* regular write of data into stack destroys any spilled ptr */ state->stack[spi].spilled_ptr.type = NOT_INIT; + /* Mark slots as STACK_MISC if they belonged to spilled ptr. */ + if (state->stack[spi].slot_type[0] == STACK_SPILL) + for (i = 0; i < BPF_REG_SIZE; i++) + state->stack[spi].slot_type[i] = STACK_MISC; /* only mark the slot as written if all 8 bytes were written * otherwise read propagation may incorrectly stop too soon @@ -1237,6 +1307,7 @@ static int check_stack_write(struct bpf_verifier_env *env, register_is_null(&cur->regs[value_regno])) type = STACK_ZERO; + /* Mark slots affected by this stack write. */ for (i = 0; i < size; i++) state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type; @@ -3788,6 +3859,12 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode) if (tnum_is_const(reg->var_off)) return !tnum_equals_const(reg->var_off, val); break; + case BPF_JSET: + if ((~reg->var_off.mask & reg->var_off.value) & val) + return 1; + if (!((reg->var_off.mask | reg->var_off.value) & val)) + return 0; + break; case BPF_JGT: if (reg->umin_value > val) return 1; @@ -3872,6 +3949,13 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg, */ __mark_reg_known(false_reg, val); break; + case BPF_JSET: + false_reg->var_off = tnum_and(false_reg->var_off, + tnum_const(~val)); + if (is_power_of_2(val)) + true_reg->var_off = tnum_or(true_reg->var_off, + tnum_const(val)); + break; case BPF_JGT: false_reg->umax_value = min(false_reg->umax_value, val); true_reg->umin_value = max(true_reg->umin_value, val + 1); @@ -3944,6 +4028,13 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, */ __mark_reg_known(false_reg, val); break; + case BPF_JSET: + false_reg->var_off = tnum_and(false_reg->var_off, + tnum_const(~val)); + if (is_power_of_2(val)) + true_reg->var_off = tnum_or(true_reg->var_off, + tnum_const(val)); + break; case BPF_JGT: true_reg->umax_value = min(true_reg->umax_value, val - 1); false_reg->umin_value = max(false_reg->umin_value, val); @@ -4561,6 +4652,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) return 0; if (w < 0 || w >= env->prog->len) { + verbose_linfo(env, t, "%d: ", t); verbose(env, "jump out of range from insn %d to %d\n", t, w); return -EINVAL; } @@ -4578,6 +4670,8 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) insn_stack[cur_stack++] = w; return 1; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { + verbose_linfo(env, t, "%d: ", t); + verbose_linfo(env, w, "%d: ", w); verbose(env, "back-edge from insn %d to %d\n", t, w); return -EINVAL; } else if (insn_state[w] == EXPLORED) { @@ -4600,10 +4694,6 @@ static int check_cfg(struct bpf_verifier_env *env) int ret = 0; int i, t; - ret = check_subprogs(env); - if (ret < 0) - return ret; - insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) return -ENOMEM; @@ -4910,8 +5000,16 @@ static int check_btf_line(struct bpf_verifier_env *env, goto err_free; } - if (!btf_name_offset_valid(btf, linfo[i].line_off) || - !btf_name_offset_valid(btf, linfo[i].file_name_off)) { + if (!prog->insnsi[linfo[i].insn_off].code) { + verbose(env, + "Invalid insn code at line_info[%u].insn_off\n", + i); + err = -EINVAL; + goto err_free; + } + + if (!btf_name_by_offset(btf, linfo[i].line_off) || + !btf_name_by_offset(btf, linfo[i].file_name_off)) { verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i); err = -EINVAL; goto err_free; @@ -5021,6 +5119,102 @@ static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) return false; } +static void clean_func_state(struct bpf_verifier_env *env, + struct bpf_func_state *st) +{ + enum bpf_reg_liveness live; + int i, j; + + for (i = 0; i < BPF_REG_FP; i++) { + live = st->regs[i].live; + /* liveness must not touch this register anymore */ + st->regs[i].live |= REG_LIVE_DONE; + if (!(live & REG_LIVE_READ)) + /* since the register is unused, clear its state + * to make further comparison simpler + */ + __mark_reg_not_init(&st->regs[i]); + } + + for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) { + live = st->stack[i].spilled_ptr.live; + /* liveness must not touch this stack slot anymore */ + st->stack[i].spilled_ptr.live |= REG_LIVE_DONE; + if (!(live & REG_LIVE_READ)) { + __mark_reg_not_init(&st->stack[i].spilled_ptr); + for (j = 0; j < BPF_REG_SIZE; j++) + st->stack[i].slot_type[j] = STACK_INVALID; + } + } +} + +static void clean_verifier_state(struct bpf_verifier_env *env, + struct bpf_verifier_state *st) +{ + int i; + + if (st->frame[0]->regs[0].live & REG_LIVE_DONE) + /* all regs in this state in all frames were already marked */ + return; + + for (i = 0; i <= st->curframe; i++) + clean_func_state(env, st->frame[i]); +} + +/* the parentage chains form a tree. + * the verifier states are added to state lists at given insn and + * pushed into state stack for future exploration. + * when the verifier reaches bpf_exit insn some of the verifer states + * stored in the state lists have their final liveness state already, + * but a lot of states will get revised from liveness point of view when + * the verifier explores other branches. + * Example: + * 1: r0 = 1 + * 2: if r1 == 100 goto pc+1 + * 3: r0 = 2 + * 4: exit + * when the verifier reaches exit insn the register r0 in the state list of + * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch + * of insn 2 and goes exploring further. At the insn 4 it will walk the + * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ. + * + * Since the verifier pushes the branch states as it sees them while exploring + * the program the condition of walking the branch instruction for the second + * time means that all states below this branch were already explored and + * their final liveness markes are already propagated. + * Hence when the verifier completes the search of state list in is_state_visited() + * we can call this clean_live_states() function to mark all liveness states + * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' + * will not be used. + * This function also clears the registers and stack for states that !READ + * to simplify state merging. + * + * Important note here that walking the same branch instruction in the callee + * doesn't meant that the states are DONE. The verifier has to compare + * the callsites + */ +static void clean_live_states(struct bpf_verifier_env *env, int insn, + struct bpf_verifier_state *cur) +{ + struct bpf_verifier_state_list *sl; + int i; + + sl = env->explored_states[insn]; + if (!sl) + return; + + while (sl != STATE_LIST_MARK) { + if (sl->state.curframe != cur->curframe) + goto next; + for (i = 0; i <= cur->curframe; i++) + if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) + goto next; + clean_verifier_state(env, &sl->state); +next: + sl = sl->next; + } +} + /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, struct idpair *idmap) @@ -5134,12 +5328,6 @@ static bool stacksafe(struct bpf_func_state *old, { int i, spi; - /* if explored stack has more populated slots than current stack - * such stacks are not equivalent - */ - if (old->allocated_stack > cur->allocated_stack) - return false; - /* walk slots of the explored stack and ignore any additional * slots in the current stack, since explored(safe) state * didn't use them @@ -5147,12 +5335,21 @@ static bool stacksafe(struct bpf_func_state *old, for (i = 0; i < old->allocated_stack; i++) { spi = i / BPF_REG_SIZE; - if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) + if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) { + i += BPF_REG_SIZE - 1; /* explored state didn't use this */ continue; + } if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; + + /* explored stack has more populated slots than current stack + * and these slots were used + */ + if (i >= cur->allocated_stack) + return false; + /* if old state was safe with misc data in the stack * it will be safe with zero-initialized stack. * The opposite is not true @@ -5336,6 +5533,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) */ return 0; + clean_live_states(env, insn_idx, cur); + while (sl != STATE_LIST_MARK) { if (states_equal(env, &sl->state, cur)) { /* reached equivalent register/stack state, @@ -5455,6 +5654,8 @@ static int do_check(struct bpf_verifier_env *env) int insn_processed = 0; bool do_print_state = false; + env->prev_linfo = NULL; + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); if (!state) return -ENOMEM; @@ -5528,6 +5729,7 @@ static int do_check(struct bpf_verifier_env *env) .private_data = env, }; + verbose_linfo(env, insn_idx, "; "); verbose(env, "%d: ", insn_idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); } @@ -6762,7 +6964,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); - ret = check_cfg(env); + ret = check_subprogs(env); if (ret < 0) goto skip_full_check; @@ -6770,6 +6972,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, if (ret < 0) goto skip_full_check; + ret = check_cfg(env); + if (ret < 0) + goto skip_full_check; + ret = do_check(env); if (env->cur_state) { free_verifier_state(env->cur_state, true); @@ -6784,10 +6990,11 @@ skip_full_check: free_states(env); if (ret == 0) - sanitize_dead_code(env); + ret = check_max_stack_depth(env); + /* instruction rewrites happen after this point */ if (ret == 0) - ret = check_max_stack_depth(env); + sanitize_dead_code(env); if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ diff --git a/kernel/module.c b/kernel/module.c index 49a405891587..06ec68f08387 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3093,6 +3093,11 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->tracepoints_ptrs), &mod->num_tracepoints); #endif +#ifdef CONFIG_BPF_EVENTS + mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map", + sizeof(*mod->bpf_raw_events), + &mod->num_bpf_raw_events); +#endif #ifdef HAVE_JUMP_LABEL mod->jump_entries = section_objs(info, "__jump_table", sizeof(*mod->jump_entries), diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9864a35c8bb5..9ddb6fddb4e0 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -17,6 +17,43 @@ #include "trace_probe.h" #include "trace.h" +#ifdef CONFIG_MODULES +struct bpf_trace_module { + struct module *module; + struct list_head list; +}; + +static LIST_HEAD(bpf_trace_modules); +static DEFINE_MUTEX(bpf_module_mutex); + +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) +{ + struct bpf_raw_event_map *btp, *ret = NULL; + struct bpf_trace_module *btm; + unsigned int i; + + mutex_lock(&bpf_module_mutex); + list_for_each_entry(btm, &bpf_trace_modules, list) { + for (i = 0; i < btm->module->num_bpf_raw_events; ++i) { + btp = &btm->module->bpf_raw_events[i]; + if (!strcmp(btp->tp->name, name)) { + if (try_module_get(btm->module)) + ret = btp; + goto out; + } + } + } +out: + mutex_unlock(&bpf_module_mutex); + return ret; +} +#else +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name) +{ + return NULL; +} +#endif /* CONFIG_MODULES */ + u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -1076,7 +1113,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) extern struct bpf_raw_event_map __start__bpf_raw_tp[]; extern struct bpf_raw_event_map __stop__bpf_raw_tp[]; -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name) +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name) { struct bpf_raw_event_map *btp = __start__bpf_raw_tp; @@ -1084,7 +1121,16 @@ struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name) if (!strcmp(btp->tp->name, name)) return btp; } - return NULL; + + return bpf_get_raw_tracepoint_module(name); +} + +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp) +{ + struct module *mod = __module_address((unsigned long)btp); + + if (mod) + module_put(mod); } static __always_inline @@ -1222,3 +1268,52 @@ int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, return err; } + +#ifdef CONFIG_MODULES +int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module) +{ + struct bpf_trace_module *btm, *tmp; + struct module *mod = module; + + if (mod->num_bpf_raw_events == 0 || + (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) + return 0; + + mutex_lock(&bpf_module_mutex); + + switch (op) { + case MODULE_STATE_COMING: + btm = kzalloc(sizeof(*btm), GFP_KERNEL); + if (btm) { + btm->module = module; + list_add(&btm->list, &bpf_trace_modules); + } + break; + case MODULE_STATE_GOING: + list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) { + if (btm->module == module) { + list_del(&btm->list); + kfree(btm); + break; + } + } + break; + } + + mutex_unlock(&bpf_module_mutex); + + return 0; +} + +static struct notifier_block bpf_module_nb = { + .notifier_call = bpf_event_notify, +}; + +int __init bpf_event_init(void) +{ + register_module_notifier(&bpf_module_nb); + return 0; +} + +fs_initcall(bpf_event_init); +#endif /* CONFIG_MODULES */ |