From eb3f595dab40b61011c5f123507a7db2df6f0e65 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:55:58 -0700 Subject: bpf: btf: Validate type reference After collecting all btf_type in the first pass in an earlier patch, the second pass (in this patch) can validate the reference types (e.g. the referring type does exist and it does not refer to itself). While checking the reference type, it also gathers other information (e.g. the size of an array). This info will be useful in checking the struct's members in a later patch. They will also be useful in doing pretty print later. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/btf.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 include/linux/btf.h (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h new file mode 100644 index 000000000000..f14b60368753 --- /dev/null +++ b/include/linux/btf.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ + +#ifndef _LINUX_BTF_H +#define _LINUX_BTF_H 1 + +#include + +struct btf; +struct btf_type; + +/* Figure out the size of a type_id. If type_id is a modifier + * (e.g. const), it will be resolved to find out the type with size. + * + * For example: + * In describing "const void *", type_id is "const" and "const" + * refers to "void *". The return type will be "void *". + * + * If type_id is a simple "int", then return type will be "int". + * + * @btf: struct btf object + * @type_id: Find out the size of type_id. The type_id of the return + * type is set to *type_id. + * @ret_size: It can be NULL. If not NULL, the size of the return + * type is set to *ret_size. + * Return: The btf_type (resolved to another type with size info if needed). + * NULL is returned if type_id itself does not have size info + * (e.g. void) or it cannot be resolved to another type that + * has size info. + * *type_id and *ret_size will not be changed in the + * NULL return case. + */ +const struct btf_type *btf_type_id_size(const struct btf *btf, + u32 *type_id, + u32 *ret_size); + +#endif -- cgit v1.3-8-gc7d7 From b00b8daec828dd59af7d1f7a42acd6e5867f80c6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:56:00 -0700 Subject: bpf: btf: Add pretty print capability for data with BTF type info This patch adds pretty print capability for data with BTF type info. The current usage is to allow pretty print for a BPF map. The next few patches will allow a read() on a pinned map with BTF type info for its key and value. This patch uses the seq_printf() infra. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/btf.h | 2 + kernel/bpf/btf.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index f14b60368753..d8bdab0280ba 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -33,5 +33,7 @@ struct btf_type; const struct btf_type *btf_type_id_size(const struct btf *btf, u32 *type_id, u32 *ret_size); +void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, + struct seq_file *m); #endif diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 4e31249f6c61..10ee41589da2 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -256,6 +257,9 @@ struct btf_kind_operations { const struct btf_type *member_type); void (*log_details)(struct btf_verifier_env *env, const struct btf_type *t); + void (*seq_show)(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offsets, + struct seq_file *m); }; static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS]; @@ -781,6 +785,13 @@ static int btf_df_resolve(struct btf_verifier_env *env, return -EINVAL; } +static void btf_df_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offsets, + struct seq_file *m) +{ + seq_printf(m, "", BTF_INFO_KIND(t->info)); +} + static int btf_int_check_member(struct btf_verifier_env *env, const struct btf_type *struct_type, const struct btf_member *member, @@ -879,11 +890,96 @@ static void btf_int_log(struct btf_verifier_env *env, btf_int_encoding_str(BTF_INT_ENCODING(int_data))); } +static void btf_int_bits_seq_show(const struct btf *btf, + const struct btf_type *t, + void *data, u8 bits_offset, + struct seq_file *m) +{ + u32 int_data = btf_type_int(t); + u16 nr_bits = BTF_INT_BITS(int_data); + u16 total_bits_offset; + u16 nr_copy_bytes; + u16 nr_copy_bits; + u8 nr_upper_bits; + union { + u64 u64_num; + u8 u8_nums[8]; + } print_num; + + total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); + data += BITS_ROUNDDOWN_BYTES(total_bits_offset); + bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset); + nr_copy_bits = nr_bits + bits_offset; + nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits); + + print_num.u64_num = 0; + memcpy(&print_num.u64_num, data, nr_copy_bytes); + + /* Ditch the higher order bits */ + nr_upper_bits = BITS_PER_BYTE_MASKED(nr_copy_bits); + if (nr_upper_bits) { + /* We need to mask out some bits of the upper byte. */ + u8 mask = (1 << nr_upper_bits) - 1; + + print_num.u8_nums[nr_copy_bytes - 1] &= mask; + } + + print_num.u64_num >>= bits_offset; + + seq_printf(m, "0x%llx", print_num.u64_num); +} + +static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + u32 int_data = btf_type_int(t); + u8 encoding = BTF_INT_ENCODING(int_data); + bool sign = encoding & BTF_INT_SIGNED; + u32 nr_bits = BTF_INT_BITS(int_data); + + if (bits_offset || BTF_INT_OFFSET(int_data) || + BITS_PER_BYTE_MASKED(nr_bits)) { + btf_int_bits_seq_show(btf, t, data, bits_offset, m); + return; + } + + switch (nr_bits) { + case 64: + if (sign) + seq_printf(m, "%lld", *(s64 *)data); + else + seq_printf(m, "%llu", *(u64 *)data); + break; + case 32: + if (sign) + seq_printf(m, "%d", *(s32 *)data); + else + seq_printf(m, "%u", *(u32 *)data); + break; + case 16: + if (sign) + seq_printf(m, "%d", *(s16 *)data); + else + seq_printf(m, "%u", *(u16 *)data); + break; + case 8: + if (sign) + seq_printf(m, "%d", *(s8 *)data); + else + seq_printf(m, "%u", *(u8 *)data); + break; + default: + btf_int_bits_seq_show(btf, t, data, bits_offset, m); + } +} + static const struct btf_kind_operations int_ops = { .check_meta = btf_int_check_meta, .resolve = btf_df_resolve, .check_member = btf_int_check_member, .log_details = btf_int_log, + .seq_show = btf_int_seq_show, }; static int btf_modifier_check_member(struct btf_verifier_env *env, @@ -1054,6 +1150,24 @@ resolved: return 0; } +static void btf_modifier_seq_show(const struct btf *btf, + const struct btf_type *t, + u32 type_id, void *data, + u8 bits_offset, struct seq_file *m) +{ + t = btf_type_id_resolve(btf, &type_id); + + btf_type_ops(t)->seq_show(btf, t, type_id, data, bits_offset, m); +} + +static void btf_ptr_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + /* It is a hashed value */ + seq_printf(m, "%p", *(void **)data); +} + static void btf_ref_type_log(struct btf_verifier_env *env, const struct btf_type *t) { @@ -1065,6 +1179,7 @@ static struct btf_kind_operations modifier_ops = { .resolve = btf_modifier_resolve, .check_member = btf_modifier_check_member, .log_details = btf_ref_type_log, + .seq_show = btf_modifier_seq_show, }; static struct btf_kind_operations ptr_ops = { @@ -1072,6 +1187,7 @@ static struct btf_kind_operations ptr_ops = { .resolve = btf_ptr_resolve, .check_member = btf_ptr_check_member, .log_details = btf_ref_type_log, + .seq_show = btf_ptr_seq_show, }; static struct btf_kind_operations fwd_ops = { @@ -1079,6 +1195,7 @@ static struct btf_kind_operations fwd_ops = { .resolve = btf_df_resolve, .check_member = btf_df_check_member, .log_details = btf_ref_type_log, + .seq_show = btf_df_seq_show, }; static int btf_array_check_member(struct btf_verifier_env *env, @@ -1209,11 +1326,36 @@ static void btf_array_log(struct btf_verifier_env *env, array->type, array->index_type, array->nelems); } +static void btf_array_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + const struct btf_array *array = btf_type_array(t); + const struct btf_kind_operations *elem_ops; + const struct btf_type *elem_type; + u32 i, elem_size, elem_type_id; + + elem_type_id = array->type; + elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size); + elem_ops = btf_type_ops(elem_type); + seq_puts(m, "["); + for (i = 0; i < array->nelems; i++) { + if (i) + seq_puts(m, ","); + + elem_ops->seq_show(btf, elem_type, elem_type_id, data, + bits_offset, m); + data += elem_size; + } + seq_puts(m, "]"); +} + static struct btf_kind_operations array_ops = { .check_meta = btf_array_check_meta, .resolve = btf_array_resolve, .check_member = btf_array_check_member, .log_details = btf_array_log, + .seq_show = btf_array_seq_show, }; static int btf_struct_check_member(struct btf_verifier_env *env, @@ -1361,11 +1503,39 @@ static void btf_struct_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } +static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + const char *seq = BTF_INFO_KIND(t->info) == BTF_KIND_UNION ? "|" : ","; + const struct btf_member *member; + u32 i; + + seq_puts(m, "{"); + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + u32 member_offset = member->offset; + u32 bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset); + u8 bits8_offset = BITS_PER_BYTE_MASKED(member_offset); + const struct btf_kind_operations *ops; + + if (i) + seq_puts(m, seq); + + ops = btf_type_ops(member_type); + ops->seq_show(btf, member_type, member->type, + data + bytes_offset, bits8_offset, m); + } + seq_puts(m, "}"); +} + static struct btf_kind_operations struct_ops = { .check_meta = btf_struct_check_meta, .resolve = btf_struct_resolve, .check_member = btf_struct_check_member, .log_details = btf_struct_log, + .seq_show = btf_struct_seq_show, }; static int btf_enum_check_member(struct btf_verifier_env *env, @@ -1441,11 +1611,31 @@ static void btf_enum_log(struct btf_verifier_env *env, btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); } +static void btf_enum_seq_show(const struct btf *btf, const struct btf_type *t, + u32 type_id, void *data, u8 bits_offset, + struct seq_file *m) +{ + const struct btf_enum *enums = btf_type_enum(t); + u32 i, nr_enums = btf_type_vlen(t); + int v = *(int *)data; + + for (i = 0; i < nr_enums; i++) { + if (v == enums[i].val) { + seq_printf(m, "%s", + btf_name_by_offset(btf, enums[i].name)); + return; + } + } + + seq_printf(m, "%d", v); +} + static struct btf_kind_operations enum_ops = { .check_meta = btf_enum_check_meta, .resolve = btf_df_resolve, .check_member = btf_enum_check_member, .log_details = btf_enum_log, + .seq_show = btf_enum_seq_show, }; static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = { @@ -1782,3 +1972,11 @@ errout: btf_free(btf); return ERR_PTR(err); } + +void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, + struct seq_file *m) +{ + const struct btf_type *t = btf_type_by_id(btf, type_id); + + btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m); +} -- cgit v1.3-8-gc7d7 From f56a653c1fd13a197076dec4461c656fd2adec73 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:56:01 -0700 Subject: bpf: btf: Add BPF_BTF_LOAD command This patch adds a BPF_BTF_LOAD command which 1) loads and verifies the BTF (implemented in earlier patches) 2) returns a BTF fd to userspace. In the next patch, the BTF fd can be specified during BPF_MAP_CREATE. It currently limits to CAP_SYS_ADMIN. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/btf.h | 4 +++ include/uapi/linux/bpf.h | 9 +++++++ kernel/bpf/btf.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 17 ++++++++++++ 4 files changed, 97 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index d8bdab0280ba..a7c7072535ea 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -8,7 +8,11 @@ struct btf; struct btf_type; +union bpf_attr; +void btf_put(struct btf *btf); +int btf_new_fd(const union bpf_attr *attr); +struct btf *btf_get_by_fd(int fd); /* Figure out the size of a type_id. If type_id is a modifier * (e.g. const), it will be resolved to find out the type with size. * diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9a2d1a04eb24..795bcd577750 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -95,6 +95,7 @@ enum bpf_cmd { BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, }; enum bpf_map_type { @@ -363,6 +364,14 @@ union bpf_attr { __u64 name; __u32 prog_fd; } raw_tracepoint; + + struct { /* anonymous struct for BPF_BTF_LOAD */ + __aligned_u64 btf; + __aligned_u64 btf_log_buf; + __u32 btf_size; + __u32 btf_log_size; + __u32 btf_log_level; + }; } __attribute__((aligned(8))); /* BPF helper function descriptions: diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 10ee41589da2..2322340694cf 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -190,6 +192,7 @@ struct btf { u32 nr_types; u32 types_size; u32 data_size; + refcount_t refcnt; }; enum verifier_phase { @@ -604,6 +607,17 @@ static void btf_free(struct btf *btf) kfree(btf); } +static void btf_get(struct btf *btf) +{ + refcount_inc(&btf->refcnt); +} + +void btf_put(struct btf *btf) +{ + if (btf && refcount_dec_and_test(&btf->refcnt)) + btf_free(btf); +} + static int env_resolve_init(struct btf_verifier_env *env) { struct btf *btf = env->btf; @@ -1963,6 +1977,7 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, if (!err) { btf_verifier_env_free(env); + btf_get(btf); return btf; } @@ -1980,3 +1995,55 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, btf_type_ops(t)->seq_show(btf, t, type_id, obj, 0, m); } + +static int btf_release(struct inode *inode, struct file *filp) +{ + btf_put(filp->private_data); + return 0; +} + +static const struct file_operations btf_fops = { + .release = btf_release, +}; + +int btf_new_fd(const union bpf_attr *attr) +{ + struct btf *btf; + int fd; + + btf = btf_parse(u64_to_user_ptr(attr->btf), + attr->btf_size, attr->btf_log_level, + u64_to_user_ptr(attr->btf_log_buf), + attr->btf_log_size); + if (IS_ERR(btf)) + return PTR_ERR(btf); + + fd = anon_inode_getfd("btf", &btf_fops, btf, + O_RDONLY | O_CLOEXEC); + if (fd < 0) + btf_put(btf); + + return fd; +} + +struct btf *btf_get_by_fd(int fd) +{ + struct btf *btf; + struct fd f; + + f = fdget(fd); + + if (!f.file) + return ERR_PTR(-EBADF); + + if (f.file->f_op != &btf_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + btf = f.file->private_data; + btf_get(btf); + fdput(f); + + return btf; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4ca46df19c9a..cd8ebadc66eb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include @@ -2023,6 +2024,19 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, return err; } +#define BPF_BTF_LOAD_LAST_FIELD btf_log_level + +static int bpf_btf_load(const union bpf_attr *attr) +{ + if (CHECK_ATTR(BPF_BTF_LOAD)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return btf_new_fd(attr); +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -2103,6 +2117,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_RAW_TRACEPOINT_OPEN: err = bpf_raw_tracepoint_open(&attr); break; + case BPF_BTF_LOAD: + err = bpf_btf_load(&attr); + break; default: err = -EINVAL; break; -- cgit v1.3-8-gc7d7 From 60197cfb6e11ffc03aa0ed23765b2f7e70b2e2d4 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:56:02 -0700 Subject: bpf: btf: Add BPF_OBJ_GET_INFO_BY_FD support to BTF fd This patch adds BPF_OBJ_GET_INFO_BY_FD support to BTF fd. The original BTF data, which was used to create the BTF fd during the earlier BPF_BTF_LOAD call, will be returned. The userspace is expected to allocate buffer to info.info and the buffer size is set to info.info_len before calling BPF_OBJ_GET_INFO_BY_FD. The original BTF data is copied to the userspace buffer (info.info). Only upto the user's specified info.info_len will be copied. The original BTF data size is set to info.info_len. The userspace needs to check if it is bigger than its allocated buffer size. If it is, the userspace should realloc with the kernel-returned info.info_len and call the BPF_OBJ_GET_INFO_BY_FD again. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/btf.h | 5 +++++ kernel/bpf/btf.c | 17 ++++++++++++++++- kernel/bpf/syscall.c | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index a7c7072535ea..a966dc6d61ee 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -10,9 +10,14 @@ struct btf; struct btf_type; union bpf_attr; +extern const struct file_operations btf_fops; + void btf_put(struct btf *btf); int btf_new_fd(const union bpf_attr *attr); struct btf *btf_get_by_fd(int fd); +int btf_get_info_by_fd(const struct btf *btf, + const union bpf_attr *attr, + union bpf_attr __user *uattr); /* Figure out the size of a type_id. If type_id is a modifier * (e.g. const), it will be resolved to find out the type with size. * diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 2322340694cf..eb56ac760547 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2002,7 +2002,7 @@ static int btf_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations btf_fops = { +const struct file_operations btf_fops = { .release = btf_release, }; @@ -2047,3 +2047,18 @@ struct btf *btf_get_by_fd(int fd) return btf; } + +int btf_get_info_by_fd(const struct btf *btf, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + void __user *udata = u64_to_user_ptr(attr->info.info); + u32 copy_len = min_t(u32, btf->data_size, + attr->info.info_len); + + if (copy_to_user(udata, btf->data, copy_len) || + put_user(btf->data_size, &uattr->info.info_len)) + return -EFAULT; + + return 0; +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cd8ebadc66eb..0a4924a0a8da 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2017,6 +2017,8 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, else if (f.file->f_op == &bpf_map_fops) err = bpf_map_get_info_by_fd(f.file->private_data, attr, uattr); + else if (f.file->f_op == &btf_fops) + err = btf_get_info_by_fd(f.file->private_data, attr, uattr); else err = -EINVAL; -- cgit v1.3-8-gc7d7 From a26ca7c982cb576749cbdd01e8ecde4bf010d60a Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 18 Apr 2018 15:56:03 -0700 Subject: bpf: btf: Add pretty print support to the basic arraymap This patch adds pretty print support to the basic arraymap. Support for other bpf maps can be added later. This patch adds new attrs to the BPF_MAP_CREATE command to allow specifying the btf_fd, btf_key_id and btf_value_id. The BPF_MAP_CREATE can then associate the btf to the map if the creating map supports BTF. A BTF supported map needs to implement two new map ops, map_seq_show_elem() and map_check_btf(). This patch has implemented these new map ops for the basic arraymap. It also adds file_operations, bpffs_map_fops, to the pinned map such that the pinned map can be opened and read. After that, the user has an intuitive way to do "cat bpffs/pathto/a-pinned-map" instead of getting an error. bpffs_map_fops should not be extended further to support other operations. Other operations (e.g. write/key-lookup...) should be realized by the userspace tools (e.g. bpftool) through the BPF_OBJ_GET_INFO_BY_FD, map's lookup/update interface...etc. Follow up patches will allow the userspace to obtain the BTF from a map-fd. Here is a sample output when reading a pinned arraymap with the following map's value: struct map_value { int count_a; int count_b; }; cat /sys/fs/bpf/pinned_array_map: 0: {1,2} 1: {3,4} 2: {5,6} ... Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 20 +++++- include/uapi/linux/bpf.h | 3 + kernel/bpf/arraymap.c | 50 +++++++++++++++ kernel/bpf/inode.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++- kernel/bpf/syscall.c | 32 +++++++++- 5 files changed, 254 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 95a7abd0ee92..ee5275e7d4df 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -22,6 +22,8 @@ struct perf_event; struct bpf_prog; struct bpf_map; struct sock; +struct seq_file; +struct btf; /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { @@ -43,10 +45,14 @@ struct bpf_map_ops { void (*map_fd_put_ptr)(void *ptr); u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); u32 (*map_fd_sys_lookup_elem)(void *ptr); + void (*map_seq_show_elem)(struct bpf_map *map, void *key, + struct seq_file *m); + int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf, + u32 key_type_id, u32 value_type_id); }; struct bpf_map { - /* 1st cacheline with read-mostly members of which some + /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). */ const struct bpf_map_ops *ops ____cacheline_aligned; @@ -62,10 +68,13 @@ struct bpf_map { u32 pages; u32 id; int numa_node; + u32 btf_key_id; + u32 btf_value_id; + struct btf *btf; bool unpriv_array; - /* 7 bytes hole */ + /* 55 bytes hole */ - /* 2nd cacheline with misc members to avoid false sharing + /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. */ struct user_struct *user ____cacheline_aligned; @@ -100,6 +109,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map) return container_of(map, struct bpf_offloaded_map, map); } +static inline bool bpf_map_support_seq_show(const struct bpf_map *map) +{ + return map->ops->map_seq_show_elem && map->ops->map_check_btf; +} + extern const struct bpf_map_ops bpf_map_offload_ops; /* function argument constraints */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 795bcd577750..c8383a289f7b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -280,6 +280,9 @@ union bpf_attr { */ char map_name[BPF_OBJ_NAME_LEN]; __u32 map_ifindex; /* ifindex of netdev to create on */ + __u32 btf_fd; /* fd pointing to a BTF type data */ + __u32 btf_key_id; /* BTF type_id of the key */ + __u32 btf_value_id; /* BTF type_id of the value */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 14750e7c5ee4..02a189339381 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -11,11 +11,13 @@ * General Public License for more details. */ #include +#include #include #include #include #include #include +#include #include "map_in_map.h" @@ -336,6 +338,52 @@ static void array_map_free(struct bpf_map *map) bpf_map_area_free(array); } +static void array_map_seq_show_elem(struct bpf_map *map, void *key, + struct seq_file *m) +{ + void *value; + + rcu_read_lock(); + + value = array_map_lookup_elem(map, key); + if (!value) { + rcu_read_unlock(); + return; + } + + seq_printf(m, "%u: ", *(u32 *)key); + btf_type_seq_show(map->btf, map->btf_value_id, value, m); + seq_puts(m, "\n"); + + rcu_read_unlock(); +} + +static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, + u32 btf_key_id, u32 btf_value_id) +{ + const struct btf_type *key_type, *value_type; + u32 key_size, value_size; + u32 int_data; + + key_type = btf_type_id_size(btf, &btf_key_id, &key_size); + if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) + return -EINVAL; + + int_data = *(u32 *)(key_type + 1); + /* bpf array can only take a u32 key. This check makes + * sure that the btf matches the attr used during map_create. + */ + if (BTF_INT_BITS(int_data) != 32 || key_size != 4 || + BTF_INT_OFFSET(int_data)) + return -EINVAL; + + value_type = btf_type_id_size(btf, &btf_value_id, &value_size); + if (!value_type || value_size > map->value_size) + return -EINVAL; + + return 0; +} + const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, @@ -345,6 +393,8 @@ const struct bpf_map_ops array_map_ops = { .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, .map_gen_lookup = array_map_gen_lookup, + .map_seq_show_elem = array_map_seq_show_elem, + .map_check_btf = array_map_check_btf, }; const struct bpf_map_ops percpu_array_map_ops = { diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index bf6da59ae0d0..a41343009ccc 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -150,8 +150,154 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; } +struct map_iter { + void *key; + bool done; +}; + +static struct map_iter *map_iter(struct seq_file *m) +{ + return m->private; +} + +static struct bpf_map *seq_file_to_map(struct seq_file *m) +{ + return file_inode(m->file)->i_private; +} + +static void map_iter_free(struct map_iter *iter) +{ + if (iter) { + kfree(iter->key); + kfree(iter); + } +} + +static struct map_iter *map_iter_alloc(struct bpf_map *map) +{ + struct map_iter *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL | __GFP_NOWARN); + if (!iter) + goto error; + + iter->key = kzalloc(map->key_size, GFP_KERNEL | __GFP_NOWARN); + if (!iter->key) + goto error; + + return iter; + +error: + map_iter_free(iter); + return NULL; +} + +static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct bpf_map *map = seq_file_to_map(m); + void *key = map_iter(m)->key; + + if (map_iter(m)->done) + return NULL; + + if (unlikely(v == SEQ_START_TOKEN)) + goto done; + + if (map->ops->map_get_next_key(map, key, key)) { + map_iter(m)->done = true; + return NULL; + } + +done: + ++(*pos); + return key; +} + +static void *map_seq_start(struct seq_file *m, loff_t *pos) +{ + if (map_iter(m)->done) + return NULL; + + return *pos ? map_iter(m)->key : SEQ_START_TOKEN; +} + +static void map_seq_stop(struct seq_file *m, void *v) +{ +} + +static int map_seq_show(struct seq_file *m, void *v) +{ + struct bpf_map *map = seq_file_to_map(m); + void *key = map_iter(m)->key; + + if (unlikely(v == SEQ_START_TOKEN)) { + seq_puts(m, "# WARNING!! The output is for debug purpose only\n"); + seq_puts(m, "# WARNING!! The output format will change\n"); + } else { + map->ops->map_seq_show_elem(map, key, m); + } + + return 0; +} + +static const struct seq_operations bpffs_map_seq_ops = { + .start = map_seq_start, + .next = map_seq_next, + .show = map_seq_show, + .stop = map_seq_stop, +}; + +static int bpffs_map_open(struct inode *inode, struct file *file) +{ + struct bpf_map *map = inode->i_private; + struct map_iter *iter; + struct seq_file *m; + int err; + + iter = map_iter_alloc(map); + if (!iter) + return -ENOMEM; + + err = seq_open(file, &bpffs_map_seq_ops); + if (err) { + map_iter_free(iter); + return err; + } + + m = file->private_data; + m->private = iter; + + return 0; +} + +static int bpffs_map_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + + map_iter_free(map_iter(m)); + + return seq_release(inode, file); +} + +/* bpffs_map_fops should only implement the basic + * read operation for a BPF map. The purpose is to + * provide a simple user intuitive way to do + * "cat bpffs/pathto/a-pinned-map". + * + * Other operations (e.g. write, lookup...) should be realized by + * the userspace tools (e.g. bpftool) through the + * BPF_OBJ_GET_INFO_BY_FD and the map's lookup/update + * interface. + */ +static const struct file_operations bpffs_map_fops = { + .open = bpffs_map_open, + .read = seq_read, + .release = bpffs_map_release, +}; + static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, - const struct inode_operations *iops) + const struct inode_operations *iops, + const struct file_operations *fops) { struct inode *dir = dentry->d_parent->d_inode; struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode); @@ -159,6 +305,7 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, return PTR_ERR(inode); inode->i_op = iops; + inode->i_fop = fops; inode->i_private = raw; bpf_dentry_finalize(dentry, inode, dir); @@ -167,12 +314,15 @@ static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw, static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg) { - return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops); + return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops, NULL); } static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) { - return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops); + struct bpf_map *map = arg; + + return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, + map->btf ? &bpffs_map_fops : NULL); } static struct dentry * diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0a4924a0a8da..fe23dc5a3ec4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -27,6 +27,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -251,6 +252,7 @@ static void bpf_map_free_deferred(struct work_struct *work) bpf_map_uncharge_memlock(map); security_bpf_map_free(map); + btf_put(map->btf); /* implementation dependent freeing */ map->ops->map_free(map); } @@ -416,7 +418,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) return 0; } -#define BPF_MAP_CREATE_LAST_FIELD map_ifindex +#define BPF_MAP_CREATE_LAST_FIELD btf_value_id /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -450,6 +452,33 @@ static int map_create(union bpf_attr *attr) atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); + if (bpf_map_support_seq_show(map) && + (attr->btf_key_id || attr->btf_value_id)) { + struct btf *btf; + + if (!attr->btf_key_id || !attr->btf_value_id) { + err = -EINVAL; + goto free_map_nouncharge; + } + + btf = btf_get_by_fd(attr->btf_fd); + if (IS_ERR(btf)) { + err = PTR_ERR(btf); + goto free_map_nouncharge; + } + + err = map->ops->map_check_btf(map, btf, attr->btf_key_id, + attr->btf_value_id); + if (err) { + btf_put(btf); + goto free_map_nouncharge; + } + + map->btf = btf; + map->btf_key_id = attr->btf_key_id; + map->btf_value_id = attr->btf_value_id; + } + err = security_bpf_map_alloc(map); if (err) goto free_map_nouncharge; @@ -482,6 +511,7 @@ free_map: free_map_sec: security_bpf_map_free(map); free_map_nouncharge: + btf_put(map->btf); map->ops->map_free(map); return err; } -- cgit v1.3-8-gc7d7