aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/core.c')
-rw-r--r--kernel/bpf/core.c273
1 files changed, 224 insertions, 49 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b1a3545d0ec8..f908b9356025 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -21,12 +21,14 @@
* Kris Katterjohn - Added many additional checks in bpf_check_classic()
*/
+#include <uapi/linux/btf.h>
#include <linux/filter.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
#include <linux/random.h>
#include <linux/moduleloader.h>
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <linux/frame.h>
#include <linux/rbtree_latch.h>
#include <linux/kallsyms.h>
@@ -52,6 +54,7 @@
#define DST regs[insn->dst_reg]
#define SRC regs[insn->src_reg]
#define FP regs[BPF_REG_FP]
+#define AX regs[BPF_REG_AX]
#define ARG1 regs[BPF_REG_ARG1]
#define CTX regs[BPF_REG_CTX]
#define IMM insn->imm
@@ -103,6 +106,91 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
}
EXPORT_SYMBOL_GPL(bpf_prog_alloc);
+int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
+{
+ if (!prog->aux->nr_linfo || !prog->jit_requested)
+ return 0;
+
+ prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo,
+ sizeof(*prog->aux->jited_linfo),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!prog->aux->jited_linfo)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void bpf_prog_free_jited_linfo(struct bpf_prog *prog)
+{
+ kfree(prog->aux->jited_linfo);
+ prog->aux->jited_linfo = NULL;
+}
+
+void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog)
+{
+ if (prog->aux->jited_linfo && !prog->aux->jited_linfo[0])
+ bpf_prog_free_jited_linfo(prog);
+}
+
+/* The jit engine is responsible to provide an array
+ * for insn_off to the jited_off mapping (insn_to_jit_off).
+ *
+ * The idx to this array is the insn_off. Hence, the insn_off
+ * here is relative to the prog itself instead of the main prog.
+ * This array has one entry for each xlated bpf insn.
+ *
+ * jited_off is the byte off to the last byte of the jited insn.
+ *
+ * Hence, with
+ * insn_start:
+ * The first bpf insn off of the prog. The insn off
+ * here is relative to the main prog.
+ * e.g. if prog is a subprog, insn_start > 0
+ * linfo_idx:
+ * The prog's idx to prog->aux->linfo and jited_linfo
+ *
+ * jited_linfo[linfo_idx] = prog->bpf_func
+ *
+ * For i > linfo_idx,
+ *
+ * jited_linfo[i] = prog->bpf_func +
+ * insn_to_jit_off[linfo[i].insn_off - insn_start - 1]
+ */
+void bpf_prog_fill_jited_linfo(struct bpf_prog *prog,
+ const u32 *insn_to_jit_off)
+{
+ u32 linfo_idx, insn_start, insn_end, nr_linfo, i;
+ const struct bpf_line_info *linfo;
+ void **jited_linfo;
+
+ if (!prog->aux->jited_linfo)
+ /* Userspace did not provide linfo */
+ return;
+
+ linfo_idx = prog->aux->linfo_idx;
+ linfo = &prog->aux->linfo[linfo_idx];
+ insn_start = linfo[0].insn_off;
+ insn_end = insn_start + prog->len;
+
+ jited_linfo = &prog->aux->jited_linfo[linfo_idx];
+ jited_linfo[0] = prog->bpf_func;
+
+ nr_linfo = prog->aux->nr_linfo - linfo_idx;
+
+ for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++)
+ /* The verifier ensures that linfo[i].insn_off is
+ * strictly increasing
+ */
+ jited_linfo[i] = prog->bpf_func +
+ insn_to_jit_off[linfo[i].insn_off - insn_start - 1];
+}
+
+void bpf_prog_free_linfo(struct bpf_prog *prog)
+{
+ bpf_prog_free_jited_linfo(prog);
+ kvfree(prog->aux->linfo);
+}
+
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
@@ -292,6 +380,26 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta,
return ret;
}
+static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta)
+{
+ struct bpf_line_info *linfo;
+ u32 i, nr_linfo;
+
+ nr_linfo = prog->aux->nr_linfo;
+ if (!nr_linfo || !delta)
+ return;
+
+ linfo = prog->aux->linfo;
+
+ for (i = 0; i < nr_linfo; i++)
+ if (off < linfo[i].insn_off)
+ break;
+
+ /* Push all off < linfo[i].insn_off by delta */
+ for (; i < nr_linfo; i++)
+ linfo[i].insn_off += delta;
+}
+
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len)
{
@@ -347,6 +455,8 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
*/
BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false));
+ bpf_adj_linfo(prog_adj, off, insn_delta);
+
return prog_adj;
}
@@ -365,13 +475,11 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
}
#ifdef CONFIG_BPF_JIT
-# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000)
-
/* All BPF JIT sysctl knobs here. */
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
int bpf_jit_harden __read_mostly;
int bpf_jit_kallsyms __read_mostly;
-int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT;
+long bpf_jit_limit __read_mostly;
static __always_inline void
bpf_get_prog_addr_region(const struct bpf_prog *prog,
@@ -390,6 +498,8 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
const char *end = sym + KSYM_NAME_LEN;
+ const struct btf_type *type;
+ const char *func_name;
BUILD_BUG_ON(sizeof("bpf_prog_") +
sizeof(prog->tag) * 2 +
@@ -404,6 +514,16 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
+
+ /* prog->aux->name will be ignored if full btf name is available */
+ if (prog->aux->func_info_cnt) {
+ type = btf_type_by_id(prog->aux->btf,
+ prog->aux->func_info[prog->aux->func_idx].type_id);
+ func_name = btf_name_by_offset(prog->aux->btf, type->name_off);
+ snprintf(sym, (size_t)(end - sym), "_%s", func_name);
+ return;
+ }
+
if (prog->aux->name[0])
snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
else
@@ -580,16 +700,27 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
static atomic_long_t bpf_jit_current;
+/* Can be overridden by an arch's JIT compiler if it has a custom,
+ * dedicated BPF backend memory area, or if neither of the two
+ * below apply.
+ */
+u64 __weak bpf_jit_alloc_exec_limit(void)
+{
#if defined(MODULES_VADDR)
+ return MODULES_END - MODULES_VADDR;
+#else
+ return VMALLOC_END - VMALLOC_START;
+#endif
+}
+
static int __init bpf_jit_charge_init(void)
{
/* Only used as heuristic here to derive limit. */
- bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
- PAGE_SIZE), INT_MAX);
+ bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
+ PAGE_SIZE), LONG_MAX);
return 0;
}
pure_initcall(bpf_jit_charge_init);
-#endif
static int bpf_jit_charge_modmem(u32 pages)
{
@@ -609,6 +740,16 @@ static void bpf_jit_uncharge_modmem(u32 pages)
atomic_long_sub(pages, &bpf_jit_current);
}
+void *__weak bpf_jit_alloc_exec(unsigned long size)
+{
+ return module_alloc(size);
+}
+
+void __weak bpf_jit_free_exec(void *addr)
+{
+ module_memfree(addr);
+}
+
struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
unsigned int alignment,
@@ -626,7 +767,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
if (bpf_jit_charge_modmem(pages))
return NULL;
- hdr = module_alloc(size);
+ hdr = bpf_jit_alloc_exec(size);
if (!hdr) {
bpf_jit_uncharge_modmem(pages);
return NULL;
@@ -650,7 +791,7 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
{
u32 pages = hdr->pages;
- module_memfree(hdr);
+ bpf_jit_free_exec(hdr);
bpf_jit_uncharge_modmem(pages);
}
@@ -717,6 +858,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG);
BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG);
+ /* Constraints on AX register:
+ *
+ * AX register is inaccessible from user space. It is mapped in
+ * all JITs, and used here for constant blinding rewrites. It is
+ * typically "stateless" meaning its contents are only valid within
+ * the executed instruction, but not across several instructions.
+ * There are a few exceptions however which are further detailed
+ * below.
+ *
+ * Constant blinding is only used by JITs, not in the interpreter.
+ * The interpreter uses AX in some occasions as a local temporary
+ * register e.g. in DIV or MOD instructions.
+ *
+ * In restricted circumstances, the verifier can also use the AX
+ * register for rewrites as long as they do not interfere with
+ * the above cases!
+ */
+ if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX)
+ goto out;
+
if (from->imm == 0 &&
(from->code == (BPF_ALU | BPF_MOV | BPF_K) ||
from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) {
@@ -909,32 +1070,34 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
#define BPF_INSN_MAP(INSN_2, INSN_3) \
/* 32 bit ALU operations. */ \
/* Register based. */ \
- INSN_3(ALU, ADD, X), \
- INSN_3(ALU, SUB, X), \
- INSN_3(ALU, AND, X), \
- INSN_3(ALU, OR, X), \
- INSN_3(ALU, LSH, X), \
- INSN_3(ALU, RSH, X), \
- INSN_3(ALU, XOR, X), \
- INSN_3(ALU, MUL, X), \
- INSN_3(ALU, MOV, X), \
- INSN_3(ALU, DIV, X), \
- INSN_3(ALU, MOD, X), \
+ INSN_3(ALU, ADD, X), \
+ INSN_3(ALU, SUB, X), \
+ INSN_3(ALU, AND, X), \
+ INSN_3(ALU, OR, X), \
+ INSN_3(ALU, LSH, X), \
+ INSN_3(ALU, RSH, X), \
+ INSN_3(ALU, XOR, X), \
+ INSN_3(ALU, MUL, X), \
+ INSN_3(ALU, MOV, X), \
+ INSN_3(ALU, ARSH, X), \
+ INSN_3(ALU, DIV, X), \
+ INSN_3(ALU, MOD, X), \
INSN_2(ALU, NEG), \
INSN_3(ALU, END, TO_BE), \
INSN_3(ALU, END, TO_LE), \
/* Immediate based. */ \
- INSN_3(ALU, ADD, K), \
- INSN_3(ALU, SUB, K), \
- INSN_3(ALU, AND, K), \
- INSN_3(ALU, OR, K), \
- INSN_3(ALU, LSH, K), \
- INSN_3(ALU, RSH, K), \
- INSN_3(ALU, XOR, K), \
- INSN_3(ALU, MUL, K), \
- INSN_3(ALU, MOV, K), \
- INSN_3(ALU, DIV, K), \
- INSN_3(ALU, MOD, K), \
+ INSN_3(ALU, ADD, K), \
+ INSN_3(ALU, SUB, K), \
+ INSN_3(ALU, AND, K), \
+ INSN_3(ALU, OR, K), \
+ INSN_3(ALU, LSH, K), \
+ INSN_3(ALU, RSH, K), \
+ INSN_3(ALU, XOR, K), \
+ INSN_3(ALU, MUL, K), \
+ INSN_3(ALU, MOV, K), \
+ INSN_3(ALU, ARSH, K), \
+ INSN_3(ALU, DIV, K), \
+ INSN_3(ALU, MOD, K), \
/* 64 bit ALU operations. */ \
/* Register based. */ \
INSN_3(ALU64, ADD, X), \
@@ -1046,7 +1209,6 @@ bool bpf_opcode_in_insntable(u8 code)
*/
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
- u64 tmp;
#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
static const void *jumptable[256] = {
@@ -1113,6 +1275,12 @@ select_insn:
DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
insn++;
CONT;
+ ALU_ARSH_X:
+ DST = (u64) (u32) ((*(s32 *) &DST) >> SRC);
+ CONT;
+ ALU_ARSH_K:
+ DST = (u64) (u32) ((*(s32 *) &DST) >> IMM);
+ CONT;
ALU64_ARSH_X:
(*(s64 *) &DST) >>= SRC;
CONT;
@@ -1120,36 +1288,36 @@ select_insn:
(*(s64 *) &DST) >>= IMM;
CONT;
ALU64_MOD_X:
- div64_u64_rem(DST, SRC, &tmp);
- DST = tmp;
+ div64_u64_rem(DST, SRC, &AX);
+ DST = AX;
CONT;
ALU_MOD_X:
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) SRC);
+ AX = (u32) DST;
+ DST = do_div(AX, (u32) SRC);
CONT;
ALU64_MOD_K:
- div64_u64_rem(DST, IMM, &tmp);
- DST = tmp;
+ div64_u64_rem(DST, IMM, &AX);
+ DST = AX;
CONT;
ALU_MOD_K:
- tmp = (u32) DST;
- DST = do_div(tmp, (u32) IMM);
+ AX = (u32) DST;
+ DST = do_div(AX, (u32) IMM);
CONT;
ALU64_DIV_X:
DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
- tmp = (u32) DST;
- do_div(tmp, (u32) SRC);
- DST = (u32) tmp;
+ AX = (u32) DST;
+ do_div(AX, (u32) SRC);
+ DST = (u32) AX;
CONT;
ALU64_DIV_K:
DST = div64_u64(DST, IMM);
CONT;
ALU_DIV_K:
- tmp = (u32) DST;
- do_div(tmp, (u32) IMM);
- DST = (u32) tmp;
+ AX = (u32) DST;
+ do_div(AX, (u32) IMM);
+ DST = (u32) AX;
CONT;
ALU_END_TO_BE:
switch (IMM) {
@@ -1405,7 +1573,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
- u64 regs[MAX_BPF_REG]; \
+ u64 regs[MAX_BPF_EXT_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
ARG1 = (u64) (unsigned long) ctx; \
@@ -1418,7 +1586,7 @@ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \
const struct bpf_insn *insn) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
- u64 regs[MAX_BPF_REG]; \
+ u64 regs[MAX_BPF_EXT_REG]; \
\
FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \
BPF_R1 = r1; \
@@ -1559,13 +1727,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
* be JITed, but falls back to the interpreter.
*/
if (!bpf_prog_is_dev_bound(fp->aux)) {
+ *err = bpf_prog_alloc_jited_linfo(fp);
+ if (*err)
+ return fp;
+
fp = bpf_int_jit_compile(fp);
-#ifdef CONFIG_BPF_JIT_ALWAYS_ON
if (!fp->jited) {
+ bpf_prog_free_jited_linfo(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
*err = -ENOTSUPP;
return fp;
- }
#endif
+ } else {
+ bpf_prog_free_unused_jited_linfo(fp);
+ }
} else {
*err = bpf_prog_offload_compile(fp);
if (*err)