aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/core.c4
-rw-r--r--kernel/bpf/helpers.c55
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/bpf/verifier.c919
-rw-r--r--kernel/events/core.c89
-rw-r--r--kernel/trace/bpf_trace.c160
7 files changed, 906 insertions, 328 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650d7aeb..a2ac051c342f 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
}
late_initcall(register_perf_event_array_map);
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int fd)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 03fd23d4d587..aa6d98154106 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void)
prandom_init_once(&bpf_user_rnd_state);
}
-u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_user_rnd_u32)
{
/* Should someone ever have the rather unwise idea to use some
* of the registers passed into this function, then note that
@@ -1031,7 +1031,7 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
state = &get_cpu_var(bpf_user_rnd_state);
res = prandom_u32_state(state);
- put_cpu_var(state);
+ put_cpu_var(bpf_user_rnd_state);
return res;
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ea3afba1a4f..39918402e6e9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
#include <linux/ktime.h>
#include <linux/sched.h>
#include <linux/uidgid.h>
+#include <linux/filter.h>
/* If kernel subsystem is allowing eBPF programs to call this function,
* inside its own verifier_ops->get_func_proto() callback it should return
@@ -26,48 +27,32 @@
* if program is allowed to access maps, so check rcu_read_lock_held in
* all three functions.
*/
-static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
- /* verifier checked that R1 contains a valid pointer to bpf_map
- * and R2 points to a program stack and map->key_size bytes were
- * initialized
- */
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
- void *value;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
- value = map->ops->map_lookup_elem(map, key);
-
- /* lookup() returns either pointer to element value or NULL
- * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
- */
- return (unsigned long) value;
+ return (unsigned long) map->ops->map_lookup_elem(map, key);
}
const struct bpf_func_proto bpf_map_lookup_elem_proto = {
.func = bpf_map_lookup_elem,
.gpl_only = false,
+ .pkt_access = true,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
};
-static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
+ void *, value, u64, flags)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
- void *value = (void *) (unsigned long) r3;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
- return map->ops->map_update_elem(map, key, value, r4);
+ return map->ops->map_update_elem(map, key, value, flags);
}
const struct bpf_func_proto bpf_map_update_elem_proto = {
.func = bpf_map_update_elem,
.gpl_only = false,
+ .pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
@@ -75,19 +60,16 @@ const struct bpf_func_proto bpf_map_update_elem_proto = {
.arg4_type = ARG_ANYTHING,
};
-static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
- void *key = (void *) (unsigned long) r2;
-
WARN_ON_ONCE(!rcu_read_lock_held());
-
return map->ops->map_delete_elem(map, key);
}
const struct bpf_func_proto bpf_map_delete_elem_proto = {
.func = bpf_map_delete_elem,
.gpl_only = false,
+ .pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_MAP_KEY,
@@ -99,7 +81,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_smp_processor_id)
{
return smp_processor_id();
}
@@ -110,7 +92,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_ktime_get_ns)
{
/* NMI safe access to clock monotonic */
return ktime_get_mono_fast_ns();
@@ -122,11 +104,11 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_pid_tgid)
{
struct task_struct *task = current;
- if (!task)
+ if (unlikely(!task))
return -EINVAL;
return (u64) task->tgid << 32 | task->pid;
@@ -138,18 +120,18 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_uid_gid)
{
struct task_struct *task = current;
kuid_t uid;
kgid_t gid;
- if (!task)
+ if (unlikely(!task))
return -EINVAL;
current_uid_gid(&uid, &gid);
return (u64) from_kgid(&init_user_ns, gid) << 32 |
- from_kuid(&init_user_ns, uid);
+ from_kuid(&init_user_ns, uid);
}
const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
@@ -158,10 +140,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
.ret_type = RET_INTEGER,
};
-static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
{
struct task_struct *task = current;
- char *buf = (char *) (long) r1;
if (unlikely(!task))
goto err_clear;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index bf4495fcd25d..732ae16d12b7 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -116,10 +116,9 @@ free_smap:
return ERR_PTR(err);
}
-u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags)
{
- struct pt_regs *regs = (struct pt_regs *) (long) r1;
- struct bpf_map *map = (struct bpf_map *) (long) r2;
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct perf_callchain_entry *trace;
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index daea765d72e6..99a7e5b388f2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
#include <linux/filter.h>
#include <net/netlink.h>
#include <linux/file.h>
@@ -126,76 +127,16 @@
* are set to NOT_INIT to indicate that they are no longer readable.
*/
-struct reg_state {
- enum bpf_reg_type type;
- union {
- /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
- s64 imm;
-
- /* valid when type == PTR_TO_PACKET* */
- struct {
- u32 id;
- u16 off;
- u16 range;
- };
-
- /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
- * PTR_TO_MAP_VALUE_OR_NULL
- */
- struct bpf_map *map_ptr;
- };
-};
-
-enum bpf_stack_slot_type {
- STACK_INVALID, /* nothing was stored in this stack slot */
- STACK_SPILL, /* register spilled into stack */
- STACK_MISC /* BPF program wrote some data into this slot */
-};
-
-#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
-
-/* state of the program:
- * type of all registers and stack info
- */
-struct verifier_state {
- struct reg_state regs[MAX_BPF_REG];
- u8 stack_slot_type[MAX_BPF_STACK];
- struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
-};
-
-/* linked list of verifier states used to prune search */
-struct verifier_state_list {
- struct verifier_state state;
- struct verifier_state_list *next;
-};
-
/* verifier_state + insn_idx are pushed to stack when branch is encountered */
-struct verifier_stack_elem {
+struct bpf_verifier_stack_elem {
/* verifer state is 'st'
* before processing instruction 'insn_idx'
* and after processing instruction 'prev_insn_idx'
*/
- struct verifier_state st;
+ struct bpf_verifier_state st;
int insn_idx;
int prev_insn_idx;
- struct verifier_stack_elem *next;
-};
-
-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-
-/* single container for all structs
- * one verifier_env per bpf_check() call
- */
-struct verifier_env {
- struct bpf_prog *prog; /* eBPF program being verified */
- struct verifier_stack_elem *head; /* stack of verifier states to be processed */
- int stack_size; /* number of states to be processed */
- struct verifier_state cur_state; /* current verifier state */
- struct verifier_state_list **explored_states; /* search pruning optimization */
- struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
- u32 used_map_cnt; /* number of used maps */
- u32 id_gen; /* used to generate unique reg IDs */
- bool allow_ptr_leaks;
+ struct bpf_verifier_stack_elem *next;
};
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
@@ -204,6 +145,7 @@ struct verifier_env {
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
+ bool pkt_access;
int regno;
int access_size;
};
@@ -240,6 +182,7 @@ static const char * const reg_type_str[] = {
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
+ [PTR_TO_MAP_VALUE_ADJ] = "map_value_adj",
[FRAME_PTR] = "fp",
[PTR_TO_STACK] = "fp",
[CONST_IMM] = "imm",
@@ -247,9 +190,9 @@ static const char * const reg_type_str[] = {
[PTR_TO_PACKET_END] = "pkt_end",
};
-static void print_verifier_state(struct verifier_state *state)
+static void print_verifier_state(struct bpf_verifier_state *state)
{
- struct reg_state *reg;
+ struct bpf_reg_state *reg;
enum bpf_reg_type t;
int i;
@@ -267,10 +210,17 @@ static void print_verifier_state(struct verifier_state *state)
else if (t == UNKNOWN_VALUE && reg->imm)
verbose("%lld", reg->imm);
else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
- t == PTR_TO_MAP_VALUE_OR_NULL)
+ t == PTR_TO_MAP_VALUE_OR_NULL ||
+ t == PTR_TO_MAP_VALUE_ADJ)
verbose("(ks=%d,vs=%d)",
reg->map_ptr->key_size,
reg->map_ptr->value_size);
+ if (reg->min_value != BPF_REGISTER_MIN_RANGE)
+ verbose(",min_value=%llu",
+ (unsigned long long)reg->min_value);
+ if (reg->max_value != BPF_REGISTER_MAX_RANGE)
+ verbose(",max_value=%llu",
+ (unsigned long long)reg->max_value);
}
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] == STACK_SPILL)
@@ -425,9 +375,9 @@ static void print_bpf_insn(struct bpf_insn *insn)
}
}
-static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
{
- struct verifier_stack_elem *elem;
+ struct bpf_verifier_stack_elem *elem;
int insn_idx;
if (env->head == NULL)
@@ -444,12 +394,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
return insn_idx;
}
-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
- int prev_insn_idx)
+static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx)
{
- struct verifier_stack_elem *elem;
+ struct bpf_verifier_stack_elem *elem;
- elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+ elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
if (!elem)
goto err;
@@ -475,13 +425,15 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void init_reg_state(struct reg_state *regs)
+static void init_reg_state(struct bpf_reg_state *regs)
{
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
regs[i].type = NOT_INIT;
regs[i].imm = 0;
+ regs[i].min_value = BPF_REGISTER_MIN_RANGE;
+ regs[i].max_value = BPF_REGISTER_MAX_RANGE;
}
/* frame pointer */
@@ -491,20 +443,26 @@ static void init_reg_state(struct reg_state *regs)
regs[BPF_REG_1].type = PTR_TO_CTX;
}
-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
{
BUG_ON(regno >= MAX_BPF_REG);
regs[regno].type = UNKNOWN_VALUE;
regs[regno].imm = 0;
}
+static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
+{
+ regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+ regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
enum reg_arg_type {
SRC_OP, /* register is used as source operand */
DST_OP, /* register is used as destination operand */
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
-static int check_reg_arg(struct reg_state *regs, u32 regno,
+static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
enum reg_arg_type t)
{
if (regno >= MAX_BPF_REG) {
@@ -564,8 +522,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
-static int check_stack_write(struct verifier_state *state, int off, int size,
- int value_regno)
+static int check_stack_write(struct bpf_verifier_state *state, int off,
+ int size, int value_regno)
{
int i;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -590,7 +548,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
} else {
/* regular write of data into stack */
state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
- (struct reg_state) {};
+ (struct bpf_reg_state) {};
for (i = 0; i < size; i++)
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -598,7 +556,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
return 0;
}
-static int check_stack_read(struct verifier_state *state, int off, int size,
+static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
int value_regno)
{
u8 *slot_type;
@@ -639,7 +597,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size,
}
/* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct verifier_env *env, u32 regno, int off,
+static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -654,24 +612,31 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
#define MAX_PACKET_OFF 0xffff
-static bool may_write_pkt_data(enum bpf_prog_type type)
+static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
+ const struct bpf_call_arg_meta *meta)
{
- switch (type) {
+ switch (env->prog->type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
+ if (meta)
+ return meta->pkt_access;
+
+ env->seen_direct_write = true;
return true;
default:
return false;
}
}
-static int check_packet_access(struct verifier_env *env, u32 regno, int off,
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *reg = &regs[regno];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *reg = &regs[regno];
off += reg->off;
- if (off < 0 || off + size > reg->range) {
+ if (off < 0 || size <= 0 || off + size > reg->range) {
verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
off, size, regno, reg->id, reg->off, reg->range);
return -EACCES;
@@ -680,9 +645,13 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
}
/* check access to 'struct bpf_context' fields */
-static int check_ctx_access(struct verifier_env *env, int off, int size,
+static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type)
{
+ /* for analyzer ctx accesses are already validated and converted */
+ if (env->analyzer_ops)
+ return 0;
+
if (env->prog->aux->ops->is_valid_access &&
env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
/* remember the offset of last byte accessed in ctx */
@@ -695,7 +664,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
return -EACCES;
}
-static bool is_pointer_value(struct verifier_env *env, int regno)
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
{
if (env->allow_ptr_leaks)
return false;
@@ -709,28 +678,19 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
}
}
-static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
- int off, int size)
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg, int off, int size)
{
- if (reg->type != PTR_TO_PACKET) {
+ if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
if (off % size != 0) {
- verbose("misaligned access off %d size %d\n", off, size);
+ verbose("misaligned access off %d size %d\n",
+ off, size);
return -EACCES;
} else {
return 0;
}
}
- switch (env->prog->type) {
- case BPF_PROG_TYPE_SCHED_CLS:
- case BPF_PROG_TYPE_SCHED_ACT:
- case BPF_PROG_TYPE_XDP:
- break;
- default:
- verbose("verifier is misconfigured\n");
- return -EACCES;
- }
-
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
/* misaligned access to packet is ok on x86,arm,arm64 */
return 0;
@@ -741,7 +701,8 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
}
/* skb->data is NET_IP_ALIGN-ed */
- if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
+ if (reg->type == PTR_TO_PACKET &&
+ (NET_IP_ALIGN + reg->off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
NET_IP_ALIGN, reg->off, off, size);
return -EACCES;
@@ -755,12 +716,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
int bpf_size, enum bpf_access_type t,
int value_regno)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *reg = &state->regs[regno];
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *reg = &state->regs[regno];
int size, err = 0;
if (reg->type == PTR_TO_STACK)
@@ -774,12 +735,52 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
if (err)
return err;
- if (reg->type == PTR_TO_MAP_VALUE) {
+ if (reg->type == PTR_TO_MAP_VALUE ||
+ reg->type == PTR_TO_MAP_VALUE_ADJ) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose("R%d leaks addr into map\n", value_regno);
return -EACCES;
}
+
+ /* If we adjusted the register to this map value at all then we
+ * need to change off and size to min_value and max_value
+ * respectively to make sure our theoretical access will be
+ * safe.
+ */
+ if (reg->type == PTR_TO_MAP_VALUE_ADJ) {
+ if (log_level)
+ print_verifier_state(state);
+ env->varlen_map_value_access = true;
+ /* The minimum value is only important with signed
+ * comparisons where we can't assume the floor of a
+ * value is 0. If we are using signed variables for our
+ * index'es we need to make sure that whatever we use
+ * will have a set floor within our range.
+ */
+ if ((s64)reg->min_value < 0) {
+ verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+ regno);
+ return -EACCES;
+ }
+ err = check_map_access(env, regno, reg->min_value + off,
+ size);
+ if (err) {
+ verbose("R%d min value is outside of the array range\n",
+ regno);
+ return err;
+ }
+
+ /* If we haven't set a max value then we need to bail
+ * since we can't be sure we won't do bad things.
+ */
+ if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+ verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
+ regno);
+ return -EACCES;
+ }
+ off += reg->max_value;
+ }
err = check_map_access(env, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown_value(state->regs, value_regno);
@@ -795,9 +796,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
err = check_ctx_access(env, off, size, t, &reg_type);
if (!err && t == BPF_READ && value_regno >= 0) {
mark_reg_unknown_value(state->regs, value_regno);
- if (env->allow_ptr_leaks)
- /* note that reg.[id|off|range] == 0 */
- state->regs[value_regno].type = reg_type;
+ /* note that reg.[id|off|range] == 0 */
+ state->regs[value_regno].type = reg_type;
}
} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -817,7 +817,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
err = check_stack_read(state, off, size, value_regno);
}
} else if (state->regs[regno].type == PTR_TO_PACKET) {
- if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) {
+ if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
verbose("cannot write into packet\n");
return -EACCES;
}
@@ -846,9 +846,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
return err;
}
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@@ -882,12 +882,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
* bytes from that pointer, make sure that it's within stack boundary
* and all elements of stack are initialized
*/
-static int check_stack_boundary(struct verifier_env *env, int regno,
+static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
int access_size, bool zero_size_allowed,
struct bpf_call_arg_meta *meta)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs;
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *regs = state->regs;
int off, i;
if (regs[regno].type != PTR_TO_STACK) {
@@ -926,18 +926,18 @@ static int check_stack_boundary(struct verifier_env *env, int regno,
return 0;
}
-static int check_func_arg(struct verifier_env *env, u32 regno,
+static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
enum bpf_arg_type arg_type,
struct bpf_call_arg_meta *meta)
{
- struct reg_state *reg = env->cur_state.regs + regno;
- enum bpf_reg_type expected_type;
+ struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+ enum bpf_reg_type expected_type, type = reg->type;
int err = 0;
if (arg_type == ARG_DONTCARE)
return 0;
- if (reg->type == NOT_INIT) {
+ if (type == NOT_INIT) {
verbose("R%d !read_ok\n", regno);
return -EACCES;
}
@@ -950,16 +950,29 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
return 0;
}
+ if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
+ verbose("helper access to the packet is not allowed\n");
+ return -EACCES;
+ }
+
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
+ if (type != PTR_TO_PACKET && type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_CONST_STACK_SIZE ||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
expected_type = CONST_IMM;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_CONST_MAP_PTR) {
expected_type = CONST_PTR_TO_MAP;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_PTR_TO_CTX) {
expected_type = PTR_TO_CTX;
+ if (type != expected_type)
+ goto err_type;
} else if (arg_type == ARG_PTR_TO_STACK ||
arg_type == ARG_PTR_TO_RAW_STACK) {
expected_type = PTR_TO_STACK;
@@ -967,20 +980,16 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
* passed in as argument, it's a CONST_IMM type. Final test
* happens during stack boundary checking.
*/
- if (reg->type == CONST_IMM && reg->imm == 0)
- expected_type = CONST_IMM;
+ if (type == CONST_IMM && reg->imm == 0)
+ /* final test in check_stack_boundary() */;
+ else if (type != PTR_TO_PACKET && type != expected_type)
+ goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
} else {
verbose("unsupported arg_type %d\n", arg_type);
return -EFAULT;
}
- if (reg->type != expected_type) {
- verbose("R%d type=%s expected=%s\n", regno,
- reg_type_str[reg->type], reg_type_str[expected_type]);
- return -EACCES;
- }
-
if (arg_type == ARG_CONST_MAP_PTR) {
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
meta->map_ptr = reg->map_ptr;
@@ -998,8 +1007,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("invalid map_ptr to access map->key\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno, meta->map_ptr->key_size,
- false, NULL);
+ if (type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno, 0,
+ meta->map_ptr->key_size);
+ else
+ err = check_stack_boundary(env, regno,
+ meta->map_ptr->key_size,
+ false, NULL);
} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
/* bpf_map_xxx(..., map_ptr, ..., value) call:
* check [value, value + map->value_size) validity
@@ -1009,9 +1023,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("invalid map_ptr to access map->value\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno,
- meta->map_ptr->value_size,
- false, NULL);
+ if (type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno, 0,
+ meta->map_ptr->value_size);
+ else
+ err = check_stack_boundary(env, regno,
+ meta->map_ptr->value_size,
+ false, NULL);
} else if (arg_type == ARG_CONST_STACK_SIZE ||
arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
@@ -1025,11 +1043,18 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
return -EACCES;
}
- err = check_stack_boundary(env, regno - 1, reg->imm,
- zero_size_allowed, meta);
+ if (regs[regno - 1].type == PTR_TO_PACKET)
+ err = check_packet_access(env, regno - 1, 0, reg->imm);
+ else
+ err = check_stack_boundary(env, regno - 1, reg->imm,
+ zero_size_allowed, meta);
}
return err;
+err_type:
+ verbose("R%d type=%s expected=%s\n", regno,
+ reg_type_str[type], reg_type_str[expected_type]);
+ return -EACCES;
}
static int check_map_func_compatibility(struct bpf_map *map, int func_id)
@@ -1053,7 +1078,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
- if (func_id != BPF_FUNC_skb_under_cgroup)
+ if (func_id != BPF_FUNC_skb_under_cgroup &&
+ func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
default:
@@ -1075,6 +1101,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
+ case BPF_FUNC_current_task_under_cgroup:
case BPF_FUNC_skb_under_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
@@ -1108,10 +1135,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
return count > 1 ? -EINVAL : 0;
}
-static void clear_all_pkt_pointers(struct verifier_env *env)
+static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs, *reg;
+ struct bpf_verifier_state *state = &env->cur_state;
+ struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
@@ -1131,12 +1158,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env)
}
}
-static int check_call(struct verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id)
{
- struct verifier_state *state = &env->cur_state;
+ struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
- struct reg_state *regs = state->regs;
- struct reg_state *reg;
+ struct bpf_reg_state *regs = state->regs;
+ struct bpf_reg_state *reg;
struct bpf_call_arg_meta meta;
bool changes_data;
int i, err;
@@ -1164,6 +1191,7 @@ static int check_call(struct verifier_env *env, int func_id)
changes_data = bpf_helper_changes_skb_data(fn->func);
memset(&meta, 0, sizeof(meta));
+ meta.pkt_access = fn->pkt_access;
/* We only support one arg being in raw mode at the moment, which
* is sufficient for the helper functions we have right now.
@@ -1214,6 +1242,7 @@ static int check_call(struct verifier_env *env, int func_id)
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+ regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
/* remember map_ptr, so that check_map_access()
* can check 'value_size' boundary of memory access
* to map element returned from bpf_map_lookup_elem()
@@ -1238,12 +1267,13 @@ static int check_call(struct verifier_env *env, int func_id)
return 0;
}
-static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
+static int check_packet_ptr_add(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
- struct reg_state *src_reg = &regs[insn->src_reg];
- struct reg_state tmp_reg;
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state tmp_reg;
s32 imm;
if (BPF_SRC(insn->code) == BPF_K) {
@@ -1311,10 +1341,10 @@ add_imm:
return 0;
}
-static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
u8 opcode = BPF_OP(insn->code);
s64 imm_log2;
@@ -1324,7 +1354,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
*/
if (BPF_SRC(insn->code) == BPF_X) {
- struct reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
dst_reg->imm && opcode == BPF_ADD) {
@@ -1413,11 +1443,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
- struct reg_state *dst_reg = &regs[insn->dst_reg];
- struct reg_state *src_reg = &regs[insn->src_reg];
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+ struct bpf_reg_state *src_reg = &regs[insn->src_reg];
u8 opcode = BPF_OP(insn->code);
/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
@@ -1433,10 +1464,110 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}
+static void check_reg_overflow(struct bpf_reg_state *reg)
+{
+ if (reg->max_value > BPF_REGISTER_MAX_RANGE)
+ reg->max_value = BPF_REGISTER_MAX_RANGE;
+ if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE)
+ reg->min_value = BPF_REGISTER_MIN_RANGE;
+}
+
+static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+ struct bpf_insn *insn)
+{
+ struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+ u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE;
+ bool min_set = false, max_set = false;
+ u8 opcode = BPF_OP(insn->code);
+
+ dst_reg = &regs[insn->dst_reg];
+ if (BPF_SRC(insn->code) == BPF_X) {
+ check_reg_overflow(&regs[insn->src_reg]);
+ min_val = regs[insn->src_reg].min_value;
+ max_val = regs[insn->src_reg].max_value;
+
+ /* If the source register is a random pointer then the
+ * min_value/max_value values represent the range of the known
+ * accesses into that value, not the actual min/max value of the
+ * register itself. In this case we have to reset the reg range
+ * values so we know it is not safe to look at.
+ */
+ if (regs[insn->src_reg].type != CONST_IMM &&
+ regs[insn->src_reg].type != UNKNOWN_VALUE) {
+ min_val = BPF_REGISTER_MIN_RANGE;
+ max_val = BPF_REGISTER_MAX_RANGE;
+ }
+ } else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
+ (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
+ min_val = max_val = insn->imm;
+ min_set = max_set = true;
+ }
+
+ /* We don't know anything about what was done to this register, mark it
+ * as unknown.
+ */
+ if (min_val == BPF_REGISTER_MIN_RANGE &&
+ max_val == BPF_REGISTER_MAX_RANGE) {
+ reset_reg_range_values(regs, insn->dst_reg);
+ return;
+ }
+
+ switch (opcode) {
+ case BPF_ADD:
+ dst_reg->min_value += min_val;
+ dst_reg->max_value += max_val;
+ break;
+ case BPF_SUB:
+ dst_reg->min_value -= min_val;
+ dst_reg->max_value -= max_val;
+ break;
+ case BPF_MUL:
+ dst_reg->min_value *= min_val;
+ dst_reg->max_value *= max_val;
+ break;
+ case BPF_AND:
+ /* & is special since it could end up with 0 bits set. */
+ dst_reg->min_value &= min_val;
+ dst_reg->max_value = max_val;
+ break;
+ case BPF_LSH:
+ /* Gotta have special overflow logic here, if we're shifting
+ * more than MAX_RANGE then just assume we have an invalid
+ * range.
+ */
+ if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
+ dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+ else
+ dst_reg->min_value <<= min_val;
+
+ if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
+ dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+ else
+ dst_reg->max_value <<= max_val;
+ break;
+ case BPF_RSH:
+ dst_reg->min_value >>= min_val;
+ dst_reg->max_value >>= max_val;
+ break;
+ case BPF_MOD:
+ /* % is special since it is an unsigned modulus, so the floor
+ * will always be 0.
+ */
+ dst_reg->min_value = 0;
+ dst_reg->max_value = max_val - 1;
+ break;
+ default:
+ reset_reg_range_values(regs, insn->dst_reg);
+ break;
+ }
+
+ check_reg_overflow(dst_reg);
+}
+
/* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
+static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs, *dst_reg;
+ struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1496,6 +1627,11 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
+ /* we are setting our register to something new, we need to
+ * reset its range values.
+ */
+ reset_reg_range_values(regs, insn->dst_reg);
+
if (BPF_SRC(insn->code) == BPF_X) {
if (BPF_CLASS(insn->code) == BPF_ALU64) {
/* case: R1 = R2
@@ -1517,6 +1653,8 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
*/
regs[insn->dst_reg].type = CONST_IMM;
regs[insn->dst_reg].imm = insn->imm;
+ regs[insn->dst_reg].max_value = insn->imm;
+ regs[insn->dst_reg].min_value = insn->imm;
}
} else if (opcode > BPF_END) {
@@ -1569,6 +1707,9 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
dst_reg = &regs[insn->dst_reg];
+ /* first we want to adjust our ranges. */
+ adjust_reg_min_max_vals(env, insn);
+
/* pattern match 'bpf_add Rx, imm' instruction */
if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
@@ -1603,28 +1744,58 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
return -EACCES;
}
- /* mark dest operand */
- mark_reg_unknown_value(regs, insn->dst_reg);
+ /* If we did pointer math on a map value then just set it to our
+ * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or
+ * loads to this register appropriately, otherwise just mark the
+ * register as unknown.
+ */
+ if (env->allow_ptr_leaks &&
+ (dst_reg->type == PTR_TO_MAP_VALUE ||
+ dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
+ dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
+ else
+ mark_reg_unknown_value(regs, insn->dst_reg);
}
return 0;
}
-static void find_good_pkt_pointers(struct verifier_env *env,
- struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+ struct bpf_reg_state *dst_reg)
{
- struct verifier_state *state = &env->cur_state;
- struct reg_state *regs = state->regs, *reg;
+ struct bpf_reg_state *regs = state->regs, *reg;
int i;
- /* r2 = r3;
- * r2 += 8
- * if (r2 > pkt_end) goto somewhere
- * r2 == dst_reg, pkt_end == src_reg,
- * r2=pkt(id=n,off=8,r=0)
- * r3=pkt(id=n,off=0,r=0)
- * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
- * so that range of bytes [r3, r3 + 8) is safe to access
+
+ /* LLVM can generate two kind of checks:
+ *
+ * Type 1:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (r2 > pkt_end) goto <handle exception>
+ * <access okay>
+ *
+ * Where:
+ * r2 == dst_reg, pkt_end == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Type 2:
+ *
+ * r2 = r3;
+ * r2 += 8;
+ * if (pkt_end >= r2) goto <access okay>
+ * <handle exception>
+ *
+ * Where:
+ * pkt_end == dst_reg, r2 == src_reg
+ * r2=pkt(id=n,off=8,r=0)
+ * r3=pkt(id=n,off=0,r=0)
+ *
+ * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
+ * so that range of bytes [r3, r3 + 8) is safe to access.
*/
+
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
regs[i].range = dst_reg->off;
@@ -1638,11 +1809,109 @@ static void find_good_pkt_pointers(struct verifier_env *env,
}
}
-static int check_cond_jmp_op(struct verifier_env *env,
+/* Adjusts the register min/max values in the case that the dst_reg is the
+ * variable register that we are working on, and src_reg is a constant or we're
+ * simply doing a BPF_K check.
+ */
+static void reg_set_min_max(struct bpf_reg_state *true_reg,
+ struct bpf_reg_state *false_reg, u64 val,
+ u8 opcode)
+{
+ switch (opcode) {
+ case BPF_JEQ:
+ /* If this is false then we know nothing Jon Snow, but if it is
+ * true then we know for sure.
+ */
+ true_reg->max_value = true_reg->min_value = val;
+ break;
+ case BPF_JNE:
+ /* If this is true we know nothing Jon Snow, but if it is false
+ * we know the value for sure;
+ */
+ false_reg->max_value = false_reg->min_value = val;
+ break;
+ case BPF_JGT:
+ /* Unsigned comparison, the minimum value is 0. */
+ false_reg->min_value = 0;
+ case BPF_JSGT:
+ /* If this is false then we know the maximum val is val,
+ * otherwise we know the min val is val+1.
+ */
+ false_reg->max_value = val;
+ true_reg->min_value = val + 1;
+ break;
+ case BPF_JGE:
+ /* Unsigned comparison, the minimum value is 0. */
+ false_reg->min_value = 0;
+ case BPF_JSGE:
+ /* If this is false then we know the maximum value is val - 1,
+ * otherwise we know the mimimum value is val.
+ */
+ false_reg->max_value = val - 1;
+ true_reg->min_value = val;
+ break;
+ default:
+ break;
+ }
+
+ check_reg_overflow(false_reg);
+ check_reg_overflow(true_reg);
+}
+
+/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
+ * is the variable reg.
+ */
+static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
+ struct bpf_reg_state *false_reg, u64 val,
+ u8 opcode)
+{
+ switch (opcode) {
+ case BPF_JEQ:
+ /* If this is false then we know nothing Jon Snow, but if it is
+ * true then we know for sure.
+ */
+ true_reg->max_value = true_reg->min_value = val;
+ break;
+ case BPF_JNE:
+ /* If this is true we know nothing Jon Snow, but if it is false
+ * we know the value for sure;
+ */
+ false_reg->max_value = false_reg->min_value = val;
+ break;
+ case BPF_JGT:
+ /* Unsigned comparison, the minimum value is 0. */
+ true_reg->min_value = 0;
+ case BPF_JSGT:
+ /*
+ * If this is false, then the val is <= the register, if it is
+ * true the register <= to the val.
+ */
+ false_reg->min_value = val;
+ true_reg->max_value = val - 1;
+ break;
+ case BPF_JGE:
+ /* Unsigned comparison, the minimum value is 0. */
+ true_reg->min_value = 0;
+ case BPF_JSGE:
+ /* If this is false then constant < register, if it is true then
+ * the register < constant.
+ */
+ false_reg->min_value = val + 1;
+ true_reg->max_value = val;
+ break;
+ default:
+ break;
+ }
+
+ check_reg_overflow(false_reg);
+ check_reg_overflow(true_reg);
+}
+
+static int check_cond_jmp_op(struct bpf_verifier_env *env,
struct bpf_insn *insn, int *insn_idx)
{
- struct reg_state *regs = env->cur_state.regs, *dst_reg;
- struct verifier_state *other_branch;
+ struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
+ struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
u8 opcode = BPF_OP(insn->code);
int err;
@@ -1704,7 +1973,24 @@ static int check_cond_jmp_op(struct verifier_env *env,
if (!other_branch)
return -EFAULT;
- /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+ /* detect if we are comparing against a constant value so we can adjust
+ * our min/max values for our dst register.
+ */
+ if (BPF_SRC(insn->code) == BPF_X) {
+ if (regs[insn->src_reg].type == CONST_IMM)
+ reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ dst_reg, regs[insn->src_reg].imm,
+ opcode);
+ else if (dst_reg->type == CONST_IMM)
+ reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+ &regs[insn->src_reg], dst_reg->imm,
+ opcode);
+ } else {
+ reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ dst_reg, insn->imm, opcode);
+ }
+
+ /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
if (BPF_SRC(insn->code) == BPF_K &&
insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
@@ -1723,13 +2009,17 @@ static int check_cond_jmp_op(struct verifier_env *env,
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
- find_good_pkt_pointers(env, dst_reg);
+ find_good_pkt_pointers(this_branch, dst_reg);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+ dst_reg->type == PTR_TO_PACKET_END &&
+ regs[insn->src_reg].type == PTR_TO_PACKET) {
+ find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
} else if (is_pointer_value(env, insn->dst_reg)) {
verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
return -EACCES;
}
if (log_level)
- print_verifier_state(&env->cur_state);
+ print_verifier_state(this_branch);
return 0;
}
@@ -1742,9 +2032,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
}
/* verify BPF_LD_IMM64 instruction */
-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
int err;
if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -1760,9 +2050,19 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
if (err)
return err;
- if (insn->src_reg == 0)
- /* generic move 64-bit immediate into a register */
+ if (insn->src_reg == 0) {
+ /* generic move 64-bit immediate into a register,
+ * only analyzer needs to collect the ld_imm value.
+ */
+ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+
+ if (!env->analyzer_ops)
+ return 0;
+
+ regs[insn->dst_reg].type = CONST_IMM;
+ regs[insn->dst_reg].imm = imm;
return 0;
+ }
/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
@@ -1799,11 +2099,11 @@ static bool may_access_skb(enum bpf_prog_type type)
* Output:
* R0 - 8/16/32-bit skb data converted to cpu endianness
*/
-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
- struct reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *regs = env->cur_state.regs;
u8 mode = BPF_MODE(insn->code);
- struct reg_state *reg;
+ struct bpf_reg_state *reg;
int i, err;
if (!may_access_skb(env->prog->type)) {
@@ -1889,7 +2189,7 @@ enum {
BRANCH = 2,
};
-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
static int *insn_stack; /* stack of insns to process */
static int cur_stack; /* current stack index */
@@ -1900,7 +2200,7 @@ static int *insn_state;
* w - next instruction
* e - edge
*/
-static int push_insn(int t, int w, int e, struct verifier_env *env)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
{
if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
return 0;
@@ -1941,7 +2241,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env)
/* non-recursive depth-first-search to detect loops in BPF program
* loop == back-edge in directed graph
*/
-static int check_cfg(struct verifier_env *env)
+static int check_cfg(struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -2050,7 +2350,8 @@ err_free:
/* the following conditions reduce the number of explored insns
* from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
*/
-static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
+static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+ struct bpf_reg_state *cur)
{
if (old->id != cur->id)
return false;
@@ -2125,9 +2426,11 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
* whereas register type in current state is meaningful, it means that
* the current state will reach 'bpf_exit' instruction safely
*/
-static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+static bool states_equal(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *old,
+ struct bpf_verifier_state *cur)
{
- struct reg_state *rold, *rcur;
+ struct bpf_reg_state *rold, *rcur;
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
@@ -2137,6 +2440,13 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
if (memcmp(rold, rcur, sizeof(*rold)) == 0)
continue;
+ /* If the ranges were not the same, but everything else was and
+ * we didn't do a variable access into a map then we are a-ok.
+ */
+ if (!env->varlen_map_value_access &&
+ rold->type == rcur->type && rold->imm == rcur->imm)
+ continue;
+
if (rold->type == NOT_INIT ||
(rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT))
continue;
@@ -2167,9 +2477,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
* the same, check that stored pointers types
* are the same as well.
* Ex: explored safe path could have stored
- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
* but current path has stored:
- * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
+ * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
* such verifier states are not equivalent.
* return false to continue verification of this path
*/
@@ -2180,10 +2490,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
return true;
}
-static int is_state_visited(struct verifier_env *env, int insn_idx)
+static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
{
- struct verifier_state_list *new_sl;
- struct verifier_state_list *sl;
+ struct bpf_verifier_state_list *new_sl;
+ struct bpf_verifier_state_list *sl;
sl = env->explored_states[insn_idx];
if (!sl)
@@ -2193,7 +2503,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
return 0;
while (sl != STATE_LIST_MARK) {
- if (states_equal(&sl->state, &env->cur_state))
+ if (states_equal(env, &sl->state, &env->cur_state))
/* reached equivalent register/stack state,
* prune the search
*/
@@ -2207,7 +2517,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
* it will be rejected. Since there are no loops, we won't be
* seeing this 'insn_idx' instruction again on the way to bpf_exit
*/
- new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+ new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
if (!new_sl)
return -ENOMEM;
@@ -2218,11 +2528,20 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
return 0;
}
-static int do_check(struct verifier_env *env)
+static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
+ int insn_idx, int prev_insn_idx)
+{
+ if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
+ return 0;
+
+ return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
+}
+
+static int do_check(struct bpf_verifier_env *env)
{
- struct verifier_state *state = &env->cur_state;
+ struct bpf_verifier_state *state = &env->cur_state;
struct bpf_insn *insns = env->prog->insnsi;
- struct reg_state *regs = state->regs;
+ struct bpf_reg_state *regs = state->regs;
int insn_cnt = env->prog->len;
int insn_idx, prev_insn_idx = 0;
int insn_processed = 0;
@@ -2230,6 +2549,7 @@ static int do_check(struct verifier_env *env)
init_reg_state(regs);
insn_idx = 0;
+ env->varlen_map_value_access = false;
for (;;) {
struct bpf_insn *insn;
u8 class;
@@ -2276,13 +2596,17 @@ static int do_check(struct verifier_env *env)
print_bpf_insn(insn);
}
+ err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
+ if (err)
+ return err;
+
if (class == BPF_ALU || class == BPF_ALU64) {
err = check_alu_op(env, insn);
if (err)
return err;
} else if (class == BPF_LDX) {
- enum bpf_reg_type src_reg_type;
+ enum bpf_reg_type *prev_src_type, src_reg_type;
/* check for reserved fields is already done */
@@ -2306,21 +2630,25 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
- if (BPF_SIZE(insn->code) != BPF_W) {
+ reset_reg_range_values(regs, insn->dst_reg);
+ if (BPF_SIZE(insn->code) != BPF_W &&
+ BPF_SIZE(insn->code) != BPF_DW) {
insn_idx++;
continue;
}
- if (insn->imm == 0) {
+ prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_src_type == NOT_INIT) {
/* saw a valid insn
* dst_reg = *(u32 *)(src_reg + off)
- * use reserved 'imm' field to mark this insn
+ * save type to validate intersecting paths
*/
- insn->imm = src_reg_type;
+ *prev_src_type = src_reg_type;
- } else if (src_reg_type != insn->imm &&
+ } else if (src_reg_type != *prev_src_type &&
(src_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_src_type == PTR_TO_CTX)) {
/* ABuser program is trying to use the same insn
* dst_reg = *(u32*) (src_reg + off)
* with different pointer types:
@@ -2333,7 +2661,7 @@ static int do_check(struct verifier_env *env)
}
} else if (class == BPF_STX) {
- enum bpf_reg_type dst_reg_type;
+ enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
err = check_xadd(env, insn);
@@ -2361,11 +2689,13 @@ static int do_check(struct verifier_env *env)
if (err)
return err;
- if (insn->imm == 0) {
- insn->imm = dst_reg_type;
- } else if (dst_reg_type != insn->imm &&
+ prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+ if (*prev_dst_type == NOT_INIT) {
+ *prev_dst_type = dst_reg_type;
+ } else if (dst_reg_type != *prev_dst_type &&
(dst_reg_type == PTR_TO_CTX ||
- insn->imm == PTR_TO_CTX)) {
+ *prev_dst_type == PTR_TO_CTX)) {
verbose("same insn cannot be used with different pointers\n");
return -EINVAL;
}
@@ -2471,6 +2801,7 @@ process_bpf_exit:
verbose("invalid BPF_LD mode\n");
return -EINVAL;
}
+ reset_reg_range_values(regs, insn->dst_reg);
} else {
verbose("unknown insn class %d\n", class);
return -EINVAL;
@@ -2483,14 +2814,28 @@ process_bpf_exit:
return 0;
}
+static int check_map_prog_compatibility(struct bpf_map *map,
+ struct bpf_prog *prog)
+
+{
+ if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+ (map->map_type == BPF_MAP_TYPE_HASH ||
+ map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
+ (map->map_flags & BPF_F_NO_PREALLOC)) {
+ verbose("perf_event programs can only use preallocated hash map\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
/* look for pseudo eBPF instructions that access map FDs and
* replace them with actual map pointers
*/
-static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
- int i, j;
+ int i, j, err;
for (i = 0; i < insn_cnt; i++, insn++) {
if (BPF_CLASS(insn->code) == BPF_LDX &&
@@ -2534,6 +2879,12 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
return PTR_ERR(map);
}
+ err = check_map_prog_compatibility(map, env->prog);
+ if (err) {
+ fdput(f);
+ return err;
+ }
+
/* store map pointer inside BPF_LD_IMM64 instruction */
insn[0].imm = (u32) (unsigned long) map;
insn[1].imm = ((u64) (unsigned long) map) >> 32;
@@ -2577,7 +2928,7 @@ next_insn:
}
/* drop refcnt of maps used by the rejected program */
-static void release_maps(struct verifier_env *env)
+static void release_maps(struct bpf_verifier_env *env)
{
int i;
@@ -2586,7 +2937,7 @@ static void release_maps(struct verifier_env *env)
}
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
-static void convert_pseudo_ld_imm64(struct verifier_env *env)
+static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
{
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
@@ -2600,62 +2951,74 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
-static int convert_ctx_accesses(struct verifier_env *env)
+static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
- struct bpf_insn *insn = env->prog->insnsi;
- int insn_cnt = env->prog->len;
- struct bpf_insn insn_buf[16];
+ const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+ const int insn_cnt = env->prog->len;
+ struct bpf_insn insn_buf[16], *insn;
struct bpf_prog *new_prog;
enum bpf_access_type type;
- int i;
+ int i, cnt, delta = 0;
- if (!env->prog->aux->ops->convert_ctx_access)
+ if (ops->gen_prologue) {
+ cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+ env->prog);
+ if (cnt >= ARRAY_SIZE(insn_buf)) {
+ verbose("bpf verifier is misconfigured\n");
+ return -EINVAL;
+ } else if (cnt) {
+ new_prog = bpf_patch_insn_single(env->prog, 0,
+ insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+ env->prog = new_prog;
+ delta += cnt - 1;
+ }
+ }
+
+ if (!ops->convert_ctx_access)
return 0;
- for (i = 0; i < insn_cnt; i++, insn++) {
- u32 insn_delta, cnt;
+ insn = env->prog->insnsi + delta;
- if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
type = BPF_READ;
- else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+ else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW))
type = BPF_WRITE;
else
continue;
- if (insn->imm != PTR_TO_CTX) {
- /* clear internal mark */
- insn->imm = 0;
+ if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
continue;
- }
- cnt = env->prog->aux->ops->
- convert_ctx_access(type, insn->dst_reg, insn->src_reg,
- insn->off, insn_buf, env->prog);
+ cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
+ insn->off, insn_buf, env->prog);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
}
- new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt);
+ new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
+ cnt);
if (!new_prog)
return -ENOMEM;
- insn_delta = cnt - 1;
+ delta += cnt - 1;
/* keep walking new program and skip insns we just inserted */
env->prog = new_prog;
- insn = new_prog->insnsi + i + insn_delta;
-
- insn_cnt += insn_delta;
- i += insn_delta;
+ insn = new_prog->insnsi + i + delta;
}
return 0;
}
-static void free_states(struct verifier_env *env)
+static void free_states(struct bpf_verifier_env *env)
{
- struct verifier_state_list *sl, *sln;
+ struct bpf_verifier_state_list *sl, *sln;
int i;
if (!env->explored_states)
@@ -2678,19 +3041,24 @@ static void free_states(struct verifier_env *env)
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
char __user *log_ubuf = NULL;
- struct verifier_env *env;
+ struct bpf_verifier_env *env;
int ret = -EINVAL;
if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
return -E2BIG;
- /* 'struct verifier_env' can be global, but since it's not small,
+ /* 'struct bpf_verifier_env' can be global, but since it's not small,
* allocate/free it every time bpf_check() is called
*/
- env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+ env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
+ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+ (*prog)->len);
+ ret = -ENOMEM;
+ if (!env->insn_aux_data)
+ goto err_free_env;
env->prog = *prog;
/* grab the mutex to protect few globals used by verifier */
@@ -2709,12 +3077,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
/* log_* values have to be sane */
if (log_size < 128 || log_size > UINT_MAX >> 8 ||
log_level == 0 || log_ubuf == NULL)
- goto free_env;
+ goto err_unlock;
ret = -ENOMEM;
log_buf = vmalloc(log_size);
if (!log_buf)
- goto free_env;
+ goto err_unlock;
} else {
log_level = 0;
}
@@ -2724,7 +3092,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
goto skip_full_check;
env->explored_states = kcalloc(env->prog->len,
- sizeof(struct verifier_state_list *),
+ sizeof(struct bpf_verifier_state_list *),
GFP_USER);
ret = -ENOMEM;
if (!env->explored_states)
@@ -2783,14 +3151,67 @@ skip_full_check:
free_log_buf:
if (log_level)
vfree(log_buf);
-free_env:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
*/
release_maps(env);
*prog = env->prog;
+err_unlock:
+ mutex_unlock(&bpf_verifier_lock);
+ vfree(env->insn_aux_data);
+err_free_env:
kfree(env);
+ return ret;
+}
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+ void *priv)
+{
+ struct bpf_verifier_env *env;
+ int ret;
+
+ env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+ if (!env)
+ return -ENOMEM;
+
+ env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+ prog->len);
+ ret = -ENOMEM;
+ if (!env->insn_aux_data)
+ goto err_free_env;
+ env->prog = prog;
+ env->analyzer_ops = ops;
+ env->analyzer_priv = priv;
+
+ /* grab the mutex to protect few globals used by verifier */
+ mutex_lock(&bpf_verifier_lock);
+
+ log_level = 0;
+
+ env->explored_states = kcalloc(env->prog->len,
+ sizeof(struct bpf_verifier_state_list *),
+ GFP_KERNEL);
+ ret = -ENOMEM;
+ if (!env->explored_states)
+ goto skip_full_check;
+
+ ret = check_cfg(env);
+ if (ret < 0)
+ goto skip_full_check;
+
+ env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
+ ret = do_check(env);
+
+skip_full_check:
+ while (pop_stack(env, NULL) >= 0);
+ free_states(env);
+
mutex_unlock(&bpf_verifier_lock);
+ vfree(env->insn_aux_data);
+err_free_env:
+ kfree(env);
return ret;
}
+EXPORT_SYMBOL_GPL(bpf_analyzer);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7c0d263f6bc5..c6e47e97b33f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7079,7 +7079,7 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(&event->pending);
}
- event->overflow_handler(event, data, regs);
+ READ_ONCE(event->overflow_handler)(event, data, regs);
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
@@ -7694,11 +7694,83 @@ static void perf_event_free_filter(struct perf_event *event)
ftrace_profile_free_filter(event);
}
+#ifdef CONFIG_BPF_SYSCALL
+static void bpf_overflow_handler(struct perf_event *event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct bpf_perf_event_data_kern ctx = {
+ .data = data,
+ .regs = regs,
+ };
+ int ret = 0;
+
+ preempt_disable();
+ if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+ goto out;
+ rcu_read_lock();
+ ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+ rcu_read_unlock();
+out:
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
+ if (!ret)
+ return;
+
+ event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ struct bpf_prog *prog;
+
+ if (event->overflow_handler_context)
+ /* hw breakpoint or kernel counter */
+ return -EINVAL;
+
+ if (event->prog)
+ return -EEXIST;
+
+ prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ event->prog = prog;
+ event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+ WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+ return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+ struct bpf_prog *prog = event->prog;
+
+ if (!prog)
+ return;
+
+ WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+ event->prog = NULL;
+ bpf_prog_put(prog);
+}
+#else
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+ return -EOPNOTSUPP;
+}
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+}
+#endif
+
static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
bool is_kprobe, is_tracepoint;
struct bpf_prog *prog;
+ if (event->attr.type == PERF_TYPE_HARDWARE ||
+ event->attr.type == PERF_TYPE_SOFTWARE)
+ return perf_event_set_bpf_handler(event, prog_fd);
+
if (event->attr.type != PERF_TYPE_TRACEPOINT)
return -EINVAL;
@@ -7739,6 +7811,8 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
{
struct bpf_prog *prog;
+ perf_event_free_bpf_handler(event);
+
if (!event->tp_event)
return;
@@ -9055,6 +9129,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (!overflow_handler && parent_event) {
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+ if (overflow_handler == bpf_overflow_handler) {
+ struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto err_ns;
+ }
+ event->prog = prog;
+ event->orig_overflow_handler =
+ parent_event->orig_overflow_handler;
+ }
+#endif
}
if (overflow_handler) {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438fdb029..5dcb99281259 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
+#include <linux/bpf_perf_event.h>
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
@@ -59,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
}
EXPORT_SYMBOL_GPL(trace_call_bpf);
-static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
{
- void *dst = (void *) (long) r1;
- int ret, size = (int) r2;
- void *unsafe_ptr = (void *) (long) r3;
+ int ret;
ret = probe_kernel_read(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
@@ -81,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
.arg3_type = ARG_ANYTHING,
};
-static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
+ u32, size)
{
- void *unsafe_ptr = (void *) (long) r1;
- void *src = (void *) (long) r2;
- int size = (int) r3;
-
/*
* Ensure we're in user context which is safe for the helper to
* run. This helper has no business in a kthread.
@@ -128,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
* limited trace_printk()
* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
*/
-static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+ u64, arg2, u64, arg3)
{
- char *fmt = (char *) (long) r1;
bool str_seen = false;
int mod[3] = {};
int fmt_cnt = 0;
@@ -176,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
switch (fmt_cnt) {
case 1:
- unsafe_addr = r3;
- r3 = (long) buf;
+ unsafe_addr = arg1;
+ arg1 = (long) buf;
break;
case 2:
- unsafe_addr = r4;
- r4 = (long) buf;
+ unsafe_addr = arg2;
+ arg2 = (long) buf;
break;
case 3:
- unsafe_addr = r5;
- r5 = (long) buf;
+ unsafe_addr = arg3;
+ arg3 = (long) buf;
break;
}
buf[0] = 0;
@@ -207,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
}
return __trace_printk(1/* fake ip will not be printed */, fmt,
- mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
- mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
- mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+ mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
+ mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
+ mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
}
static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -231,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
{
- struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
@@ -310,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
return 0;
}
-static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
+ u64, flags, void *, data, u64, size)
{
- struct pt_regs *regs = (struct pt_regs *)(long) r1;
- struct bpf_map *map = (struct bpf_map *)(long) r2;
- void *data = (void *)(long) r4;
struct perf_raw_record raw = {
.frag = {
.size = size,
@@ -365,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
return __bpf_perf_event_output(regs, map, flags, &raw);
}
-static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_task)
{
return (long) current;
}
@@ -376,6 +370,31 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
.ret_type = RET_INTEGER,
};
+BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct cgroup *cgrp;
+
+ if (unlikely(in_interrupt()))
+ return -EINVAL;
+ if (unlikely(idx >= array->map.max_entries))
+ return -E2BIG;
+
+ cgrp = READ_ONCE(array->ptrs[idx]);
+ if (unlikely(!cgrp))
+ return -EAGAIN;
+
+ return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+ .func = bpf_current_task_under_cgroup,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -407,6 +426,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_perf_event_read_proto;
case BPF_FUNC_probe_write_user:
return bpf_get_probe_write_proto();
+ case BPF_FUNC_current_task_under_cgroup:
+ return &bpf_current_task_under_cgroup_proto;
+ case BPF_FUNC_get_prandom_u32:
+ return &bpf_get_prandom_u32_proto;
default:
return NULL;
}
@@ -447,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = {
.type = BPF_PROG_TYPE_KPROBE,
};
-static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
+ u64, flags, void *, data, u64, size)
{
+ struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
/*
* r1 points to perf tracepoint buffer where first 8 bytes are hidden
* from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
- * from there and call the same bpf_perf_event_output() helper
+ * from there and call the same bpf_perf_event_output() helper inline.
*/
- u64 ctx = *(long *)(uintptr_t)r1;
-
- return bpf_perf_event_output(ctx, r2, index, r4, size);
+ return ____bpf_perf_event_output(regs, map, flags, data, size);
}
static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
@@ -470,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
.arg5_type = ARG_CONST_STACK_SIZE,
};
-static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
+ u64, flags)
{
- u64 ctx = *(long *)(uintptr_t)r1;
+ struct pt_regs *regs = *(struct pt_regs **)tp_buff;
- return bpf_get_stackid(ctx, r2, r3, r4, r5);
+ /*
+ * Same comment as in bpf_perf_event_output_tp(), only that this time
+ * the other helper's function body cannot be inlined due to being
+ * external, thus we need to call raw helper function.
+ */
+ return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+ flags, 0, 0);
}
static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
@@ -520,10 +551,69 @@ static struct bpf_prog_type_list tracepoint_tl = {
.type = BPF_PROG_TYPE_TRACEPOINT,
};
+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+ enum bpf_reg_type *reg_type)
+{
+ if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+ return false;
+ if (type != BPF_READ)
+ return false;
+ if (off % size != 0)
+ return false;
+ if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
+ if (size != sizeof(u64))
+ return false;
+ } else {
+ if (size != sizeof(long))
+ return false;
+ }
+ return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+ int src_reg, int ctx_off,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
+{
+ struct bpf_insn *insn = insn_buf;
+
+ switch (ctx_off) {
+ case offsetof(struct bpf_perf_event_data, sample_period):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+ data), dst_reg, src_reg,
+ offsetof(struct bpf_perf_event_data_kern, data));
+ *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+ offsetof(struct perf_sample_data, period));
+ break;
+ default:
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+ regs), dst_reg, src_reg,
+ offsetof(struct bpf_perf_event_data_kern, regs));
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
+ break;
+ }
+
+ return insn - insn_buf;
+}
+
+static const struct bpf_verifier_ops perf_event_prog_ops = {
+ .get_func_proto = tp_prog_func_proto,
+ .is_valid_access = pe_prog_is_valid_access,
+ .convert_ctx_access = pe_prog_convert_ctx_access,
+};
+
+static struct bpf_prog_type_list perf_event_tl = {
+ .ops = &perf_event_prog_ops,
+ .type = BPF_PROG_TYPE_PERF_EVENT,
+};
+
static int __init register_kprobe_prog_ops(void)
{
bpf_register_prog_type(&kprobe_tl);
bpf_register_prog_type(&tracepoint_tl);
+ bpf_register_prog_type(&perf_event_tl);
return 0;
}
late_initcall(register_kprobe_prog_ops);