diff options
Diffstat (limited to 'arch/arm64/net')
-rw-r--r-- | arch/arm64/net/bpf_jit.h | 16 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 507 |
2 files changed, 404 insertions, 119 deletions
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index a6acb94ea3d6..23b1b34db088 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -59,10 +59,13 @@ AARCH64_INSN_LDST_##type##_REG_OFFSET) #define A64_STRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, STORE) #define A64_LDRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, LOAD) +#define A64_LDRSB(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 8, SIGNED_LOAD) #define A64_STRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, STORE) #define A64_LDRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, LOAD) +#define A64_LDRSH(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 16, SIGNED_LOAD) #define A64_STR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, STORE) #define A64_LDR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, LOAD) +#define A64_LDRSW(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 32, SIGNED_LOAD) #define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE) #define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD) @@ -73,10 +76,13 @@ AARCH64_INSN_LDST_##type##_IMM_OFFSET) #define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE) #define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD) +#define A64_LDRSBI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 8, SIGNED_LOAD) #define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE) #define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD) +#define A64_LDRSHI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 16, SIGNED_LOAD) #define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE) #define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD) +#define A64_LDRSWI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 32, SIGNED_LOAD) #define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) #define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) @@ -186,6 +192,11 @@ #define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) #define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) +/* Sign extend */ +#define A64_SXTB(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 7) +#define A64_SXTH(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 15) +#define A64_SXTW(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 31) + /* Move wide (immediate) */ #define A64_MOVEW(sf, Rd, imm16, shift, type) \ aarch64_insn_gen_movewide(Rd, imm16, shift, \ @@ -223,6 +234,7 @@ #define A64_DATA2(sf, Rd, Rn, Rm, type) aarch64_insn_gen_data2(Rd, Rn, Rm, \ A64_VARIANT(sf), AARCH64_INSN_DATA2_##type) #define A64_UDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, UDIV) +#define A64_SDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, SDIV) #define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV) #define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV) #define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV) @@ -281,4 +293,8 @@ /* DMB */ #define A64_DMB_ISH aarch64_insn_gen_dmb(AARCH64_INSN_MB_ISH) +/* ADR */ +#define A64_ADR(Rd, offset) \ + aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR) + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 7ca8779ae34f..c5b461dda438 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -76,6 +76,7 @@ struct jit_ctx { int *offset; int exentry_idx; __le32 *image; + __le32 *ro_image; u32 stack_size; int fpb_offset; }; @@ -205,6 +206,14 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); } +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) + return -EINVAL; + + return 0; +} + static inline int epilogue_offset(const struct jit_ctx *ctx) { int to = ctx->epilogue_offset; @@ -285,10 +294,11 @@ static bool is_lsi_offset(int offset, int scale) /* Tail call offset to jump into */ #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) -static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) +static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, + bool is_exception_cb) { const struct bpf_prog *prog = ctx->prog; - const bool is_main_prog = prog->aux->func_idx == 0; + const bool is_main_prog = !bpf_is_subprog(prog); const u8 r6 = bpf2a64[BPF_REG_6]; const u8 r7 = bpf2a64[BPF_REG_7]; const u8 r8 = bpf2a64[BPF_REG_8]; @@ -322,24 +332,45 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ - emit_bti(A64_BTI_C, ctx); + /* bpf function may be invoked by 3 instruction types: + * 1. bl, attached via freplace to bpf prog via short jump + * 2. br, attached via freplace to bpf prog via long jump + * 3. blr, working as a function pointer, used by emit_call. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); emit(A64_MOV(1, A64_R(9), A64_LR), ctx); emit(A64_NOP, ctx); - /* Sign lr */ - if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) - emit(A64_PACIASP, ctx); - - /* Save FP and LR registers to stay align with ARM64 AAPCS */ - emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); - emit(A64_MOV(1, A64_FP, A64_SP), ctx); - - /* Save callee-saved registers */ - emit(A64_PUSH(r6, r7, A64_SP), ctx); - emit(A64_PUSH(r8, r9, A64_SP), ctx); - emit(A64_PUSH(fp, tcc, A64_SP), ctx); - emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); + if (!is_exception_cb) { + /* Sign lr */ + if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) + emit(A64_PACIASP, ctx); + /* Save FP and LR registers to stay align with ARM64 AAPCS */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* Save callee-saved registers */ + emit(A64_PUSH(r6, r7, A64_SP), ctx); + emit(A64_PUSH(r8, r9, A64_SP), ctx); + emit(A64_PUSH(fp, tcc, A64_SP), ctx); + emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); + } else { + /* + * Exception callback receives FP of Main Program as third + * parameter + */ + emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); + /* + * Main Program already pushed the frame record and the + * callee-saved registers. The exception callback will not push + * anything and re-use the main program's stack. + * + * 10 registers are on the stack + */ + emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx); + } /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); @@ -359,6 +390,20 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit_bti(A64_BTI_J, ctx); } + /* + * Program acting as exception boundary should save all ARM64 + * Callee-saved registers as the exception callback needs to recover + * all ARM64 Callee-saved registers in its epilogue. + */ + if (prog->aux->exception_boundary) { + /* + * As we are pushing two more registers, BPF_FP should be moved + * 16 bytes + */ + emit(A64_SUB_I(1, fp, fp, 16), ctx); + emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); + } + emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); /* Stack must be multiples of 16B */ @@ -647,7 +692,7 @@ static void build_plt(struct jit_ctx *ctx) plt->target = (u64)&dummy_tramp; } -static void build_epilogue(struct jit_ctx *ctx) +static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) { const u8 r0 = bpf2a64[BPF_REG_0]; const u8 r6 = bpf2a64[BPF_REG_6]; @@ -660,6 +705,15 @@ static void build_epilogue(struct jit_ctx *ctx) /* We're done with BPF stack */ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + /* + * Program acting as exception boundary pushes R23 and R24 in addition + * to BPF callee-saved registers. Exception callback uses the boundary + * program's stack frame, so recover these extra registers in the above + * two cases. + */ + if (ctx->prog->aux->exception_boundary || is_exception_cb) + emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); + /* Restore x27 and x28 */ emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); /* Restore fs (x25) and x26 */ @@ -701,7 +755,8 @@ static int add_exception_handler(const struct bpf_insn *insn, struct jit_ctx *ctx, int dst_reg) { - off_t offset; + off_t ins_offset; + off_t fixup_offset; unsigned long pc; struct exception_table_entry *ex; @@ -709,7 +764,8 @@ static int add_exception_handler(const struct bpf_insn *insn, /* First pass */ return 0; - if (BPF_MODE(insn->code) != BPF_PROBE_MEM) + if (BPF_MODE(insn->code) != BPF_PROBE_MEM && + BPF_MODE(insn->code) != BPF_PROBE_MEMSX) return 0; if (!ctx->prog->aux->extable || @@ -717,12 +773,17 @@ static int add_exception_handler(const struct bpf_insn *insn, return -EINVAL; ex = &ctx->prog->aux->extable[ctx->exentry_idx]; - pc = (unsigned long)&ctx->image[ctx->idx - 1]; + pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; - offset = pc - (long)&ex->insn; - if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + /* + * This is the relative offset of the instruction that may fault from + * the exception table itself. This will be written to the exception + * table and if this instruction faults, the destination register will + * be set to '0' and the execution will jump to the next instruction. + */ + ins_offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) return -ERANGE; - ex->insn = offset; /* * Since the extable follows the program, the fixup offset is always @@ -731,12 +792,25 @@ static int add_exception_handler(const struct bpf_insn *insn, * bits. We don't need to worry about buildtime or runtime sort * modifying the upper bits because the table is already sorted, and * isn't part of the main exception table. + * + * The fixup_offset is set to the next instruction from the instruction + * that may fault. The execution will jump to this after handling the + * fault. */ - offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); - if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) + fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) return -ERANGE; - ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | + /* + * The offsets above have been calculated using the RO buffer but we + * need to use the R/W buffer for writes. + * switch ex to rw buffer for writing. + */ + ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); + + ex->insn = ins_offset; + + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); ex->type = EX_TYPE_BPF; @@ -773,12 +847,26 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 dst_adj; int off_adj; int ret; + bool sign_extend; switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: - emit(A64_MOV(is64, dst, src), ctx); + switch (insn->off) { + case 0: + emit(A64_MOV(is64, dst, src), ctx); + break; + case 8: + emit(A64_SXTB(is64, dst, src), ctx); + break; + case 16: + emit(A64_SXTH(is64, dst, src), ctx); + break; + case 32: + emit(A64_SXTW(is64, dst, src), ctx); + break; + } break; /* dst = dst OP src */ case BPF_ALU | BPF_ADD | BPF_X: @@ -807,11 +895,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit(A64_UDIV(is64, dst, dst, src), ctx); + if (!off) + emit(A64_UDIV(is64, dst, dst, src), ctx); + else + emit(A64_SDIV(is64, dst, dst, src), ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - emit(A64_UDIV(is64, tmp, dst, src), ctx); + if (!off) + emit(A64_UDIV(is64, tmp, dst, src), ctx); + else + emit(A64_SDIV(is64, tmp, dst, src), ctx); emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); break; case BPF_ALU | BPF_LSH | BPF_X: @@ -834,11 +928,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* dst = BSWAP##imm(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: #ifdef CONFIG_CPU_BIG_ENDIAN - if (BPF_SRC(code) == BPF_FROM_BE) + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) goto emit_bswap_uxt; #else /* !CONFIG_CPU_BIG_ENDIAN */ - if (BPF_SRC(code) == BPF_FROM_LE) + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; #endif switch (imm) { @@ -937,12 +1032,18 @@ emit_bswap_uxt: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_UDIV(is64, dst, dst, tmp), ctx); + if (!off) + emit(A64_UDIV(is64, dst, dst, tmp), ctx); + else + emit(A64_SDIV(is64, dst, dst, tmp), ctx); break; case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: emit_a64_mov_i(is64, tmp2, imm, ctx); - emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); + if (!off) + emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); + else + emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); break; case BPF_ALU | BPF_LSH | BPF_K: @@ -960,7 +1061,11 @@ emit_bswap_uxt: /* JUMP off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2a64_offset(i, off, ctx); + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(code) == BPF_JMP) + jmp_offset = bpf2a64_offset(i, off, ctx); + else + jmp_offset = bpf2a64_offset(i, imm, ctx); check_imm26(jmp_offset); emit(A64_B(jmp_offset), ctx); break; @@ -1116,7 +1221,7 @@ emit_cond_jmp: return 1; } - /* LDX: dst = *(size *)(src + off) */ + /* LDX: dst = (u64)*(unsigned size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: @@ -1125,6 +1230,13 @@ emit_cond_jmp: case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: + /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: if (ctx->fpb_offset > 0 && src == fp) { src_adj = fpb; off_adj = off + ctx->fpb_offset; @@ -1132,29 +1244,49 @@ emit_cond_jmp: src_adj = src; off_adj = off; } + sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX); switch (BPF_SIZE(code)) { case BPF_W: if (is_lsi_offset(off_adj, 2)) { - emit(A64_LDR32I(dst, src_adj, off_adj), ctx); + if (sign_extend) + emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_LDR32(dst, src, tmp), ctx); + if (sign_extend) + emit(A64_LDRSW(dst, src_adj, off_adj), ctx); + else + emit(A64_LDR32(dst, src, tmp), ctx); } break; case BPF_H: if (is_lsi_offset(off_adj, 1)) { - emit(A64_LDRHI(dst, src_adj, off_adj), ctx); + if (sign_extend) + emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_LDRH(dst, src, tmp), ctx); + if (sign_extend) + emit(A64_LDRSH(dst, src, tmp), ctx); + else + emit(A64_LDRH(dst, src, tmp), ctx); } break; case BPF_B: if (is_lsi_offset(off_adj, 0)) { - emit(A64_LDRBI(dst, src_adj, off_adj), ctx); + if (sign_extend) + emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_LDRB(dst, src, tmp), ctx); + if (sign_extend) + emit(A64_LDRSB(dst, src, tmp), ctx); + else + emit(A64_LDRB(dst, src, tmp), ctx); } break; case BPF_DW: @@ -1446,7 +1578,8 @@ static inline void bpf_flush_icache(void *start, void *end) struct arm64_jit_data { struct bpf_binary_header *header; - u8 *image; + u8 *ro_image; + struct bpf_binary_header *ro_header; struct jit_ctx ctx; }; @@ -1455,12 +1588,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; + struct bpf_binary_header *ro_header; struct arm64_jit_data *jit_data; bool was_classic = bpf_prog_was_classic(prog); bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; u8 *image_ptr; + u8 *ro_image_ptr; if (!prog->jit_requested) return orig_prog; @@ -1487,8 +1622,11 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } if (jit_data->ctx.offset) { ctx = jit_data->ctx; - image_ptr = jit_data->image; + ro_image_ptr = jit_data->ro_image; + ro_header = jit_data->ro_header; header = jit_data->header; + image_ptr = (void *)header + ((void *)ro_image_ptr + - (void *)ro_header); extra_pass = true; prog_size = sizeof(u32) * ctx.idx; goto skip_init_ctx; @@ -1496,7 +1634,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); + ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; @@ -1510,7 +1648,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * BPF line info needs ctx->offset[i] to be the offset of * instruction[i] in jited image, so build prologue first. */ - if (build_prologue(&ctx, was_classic)) { + if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) { prog = orig_prog; goto out_off; } @@ -1521,7 +1659,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } ctx.epilogue_offset = ctx.idx; - build_epilogue(&ctx); + build_epilogue(&ctx, prog->aux->exception_cb); build_plt(&ctx); extable_align = __alignof__(struct exception_table_entry); @@ -1533,63 +1671,81 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* also allocate space for plt target */ extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); image_size = extable_offset + extable_size; - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) { + ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, + sizeof(u32), &header, &image_ptr, + jit_fill_hole); + if (!ro_header) { prog = orig_prog; goto out_off; } /* 2. Now, the actual pass. */ + /* + * Use the image(RW) for writing the JITed instructions. But also save + * the ro_image(RX) for calculating the offsets in the image. The RW + * image will be later copied to the RX image from where the program + * will run. The bpf_jit_binary_pack_finalize() will do this copy in the + * final step. + */ ctx.image = (__le32 *)image_ptr; + ctx.ro_image = (__le32 *)ro_image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + extable_offset; + prog->aux->extable = (void *)ro_image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; - build_prologue(&ctx, was_classic); + build_prologue(&ctx, was_classic, prog->aux->exception_cb); if (build_body(&ctx, extra_pass)) { - bpf_jit_binary_free(header); prog = orig_prog; - goto out_off; + goto out_free_hdr; } - build_epilogue(&ctx); + build_epilogue(&ctx, prog->aux->exception_cb); build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ if (validate_ctx(&ctx)) { - bpf_jit_binary_free(header); prog = orig_prog; - goto out_off; + goto out_free_hdr; } /* And we're done. */ if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, prog_size, 2, ctx.image); - bpf_flush_icache(header, ctx.image + ctx.idx); - if (!prog->is_func || extra_pass) { if (extra_pass && ctx.idx != jit_data->ctx.idx) { pr_err_once("multi-func JIT bug %d != %d\n", ctx.idx, jit_data->ctx.idx); - bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; prog->jited_len = 0; + goto out_free_hdr; + } + if (WARN_ON(bpf_jit_binary_pack_finalize(prog, ro_header, + header))) { + /* ro_header has been freed */ + ro_header = NULL; + prog = orig_prog; goto out_off; } - bpf_jit_binary_lock_ro(header); + /* + * The instructions have now been copied to the ROX region from + * where they will execute. Now the data cache has to be cleaned to + * the PoU and the I-cache has to be invalidated for the VAs. + */ + bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); } else { jit_data->ctx = ctx; - jit_data->image = image_ptr; + jit_data->ro_image = ro_image_ptr; jit_data->header = header; + jit_data->ro_header = ro_header; } - prog->bpf_func = (void *)ctx.image; + + prog->bpf_func = (void *)ctx.ro_image; prog->jited = 1; prog->jited_len = prog_size; @@ -1601,7 +1757,7 @@ skip_init_ctx: ctx.offset[i] *= AARCH64_INSN_SIZE; bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); out_off: - kfree(ctx.offset); + kvfree(ctx.offset); kfree(jit_data); prog->aux->jit_data = NULL; } @@ -1610,6 +1766,14 @@ out: bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); return prog; + +out_free_hdr: + if (header) { + bpf_arch_text_copy(&ro_header->size, &header->size, + sizeof(header->size)); + bpf_jit_binary_pack_free(ro_header, header); + } + goto out_off; } bool bpf_jit_supports_kfunc_call(void) @@ -1617,6 +1781,13 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +void *bpf_arch_text_copy(void *dst, void *src, size_t len) +{ + if (!aarch64_insn_copy(dst, src, len)) + return ERR_PTR(-EINVAL); + return dst; +} + u64 bpf_jit_alloc_exec_limit(void) { return VMALLOC_END - VMALLOC_START; @@ -1643,19 +1814,14 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, int args_off, int retval_off, int run_ctx_off, bool save_ret) { - u32 *branch; + __le32 *branch; u64 enter_prog; u64 exit_prog; struct bpf_prog *p = l->link.prog; int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); - if (p->aux->sleepable) { - enter_prog = (u64)__bpf_prog_enter_sleepable; - exit_prog = (u64)__bpf_prog_exit_sleepable; - } else { - enter_prog = (u64)__bpf_prog_enter; - exit_prog = (u64)__bpf_prog_exit; - } + enter_prog = (u64)bpf_trampoline_enter(p); + exit_prog = (u64)bpf_trampoline_exit(p); if (l->cookie == 0) { /* if cookie is zero, one instruction is enough to store it */ @@ -1698,7 +1864,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, if (ctx->image) { int offset = &ctx->image[ctx->idx] - branch; - *branch = A64_CBZ(1, A64_R(0), offset); + *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); } /* arg1: prog */ @@ -1713,7 +1879,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, int args_off, int retval_off, int run_ctx_off, - u32 **branches) + __le32 **branches) { int i; @@ -1736,21 +1902,21 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, } } -static void save_args(struct jit_ctx *ctx, int args_off, int nargs) +static void save_args(struct jit_ctx *ctx, int args_off, int nregs) { int i; - for (i = 0; i < nargs; i++) { + for (i = 0; i < nregs; i++) { emit(A64_STR64I(i, A64_SP, args_off), ctx); args_off += 8; } } -static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) +static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) { int i; - for (i = 0; i < nargs; i++) { + for (i = 0; i < nregs; i++) { emit(A64_LDR64I(i, A64_SP, args_off), ctx); args_off += 8; } @@ -1768,8 +1934,8 @@ static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) * */ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, - struct bpf_tramp_links *tlinks, void *orig_call, - int nargs, u32 flags) + struct bpf_tramp_links *tlinks, void *func_addr, + int nregs, u32 flags) { int i; int stack_size; @@ -1777,14 +1943,14 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, int regs_off; int retval_off; int args_off; - int nargs_off; + int nregs_off; int ip_off; int run_ctx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; bool save_ret; - u32 **branches = NULL; + __le32 **branches = NULL; /* trampoline stack layout: * [ parent ip ] @@ -1800,11 +1966,11 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or * BPF_TRAMP_F_RET_FENTRY_RET * - * [ argN ] + * [ arg reg N ] * [ ... ] - * SP + args_off [ arg1 ] + * SP + args_off [ arg reg 1 ] * - * SP + nargs_off [ args count ] + * SP + nregs_off [ arg regs count ] * * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * @@ -1821,13 +1987,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; - nargs_off = stack_size; + nregs_off = stack_size; /* room for args count */ stack_size += 8; args_off = stack_size; /* room for args */ - stack_size += nargs * 8; + stack_size += nregs * 8; /* room for return value */ retval_off = stack_size; @@ -1866,16 +2032,16 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_IP_ARG) { /* save ip address of the traced function */ - emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx); + emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx); emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); } - /* save args count*/ - emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx); - emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx); + /* save arg regs count*/ + emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); - /* save args */ - save_args(ctx, args_off, nargs); + /* save arg regs */ + save_args(ctx, args_off, nregs); /* save callee saved registers */ emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -1892,7 +2058,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, flags & BPF_TRAMP_F_RET_FENTRY_RET); if (fmod_ret->nr_links) { - branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), + branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), GFP_KERNEL); if (!branches) return -ENOMEM; @@ -1902,21 +2068,22 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_args(ctx, args_off, nargs); + restore_args(ctx, args_off, nregs); /* call original func */ emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); - emit(A64_BLR(A64_R(10)), ctx); + emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); + emit(A64_RET(A64_R(10)), ctx); /* store return value */ emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); /* reserve a nop for bpf_tramp_image_put */ - im->ip_after_call = ctx->image + ctx->idx; + im->ip_after_call = ctx->ro_image + ctx->idx; emit(A64_NOP, ctx); } /* update the branches saved in invoke_bpf_mod_ret with cbnz */ for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { int offset = &ctx->image[ctx->idx] - branches[i]; - *branches[i] = A64_CBNZ(1, A64_R(10), offset); + *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); } for (i = 0; i < fexit->nr_links; i++) @@ -1924,13 +2091,13 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, run_ctx_off, false); if (flags & BPF_TRAMP_F_CALL_ORIG) { - im->ip_epilogue = ctx->image + ctx->idx; + im->ip_epilogue = ctx->ro_image + ctx->idx; emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); emit_call((const u64)__bpf_tramp_exit, ctx); } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_args(ctx, args_off, nargs); + restore_args(ctx, args_off, nregs); /* restore callee saved register x19 and x20 */ emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); @@ -1957,50 +2124,113 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, emit(A64_RET(A64_R(10)), ctx); } - if (ctx->image) - bpf_flush_icache(ctx->image, ctx->image + ctx->idx); - kfree(branches); return ctx->idx; } -int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, - void *image_end, const struct btf_func_model *m, - u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call) +static int btf_func_model_nregs(const struct btf_func_model *m) +{ + int nregs = m->nr_args; + int i; + + /* extra registers needed for struct argument */ + for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { + /* The arg_size is at most 16 bytes, enforced by the verifier. */ + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + nregs += (m->arg_size[i] + 7) / 8 - 1; + } + + return nregs; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) { - int ret; - int nargs = m->nr_args; - int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; struct jit_ctx ctx = { .image = NULL, .idx = 0, }; + struct bpf_tramp_image im; + int nregs, ret; - /* the first 8 arguments are passed by registers */ - if (nargs > 8) + nregs = btf_func_model_nregs(m); + /* the first 8 registers are used for arguments */ + if (nregs > 8) return -ENOTSUPP; - ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); if (ret < 0) return ret; - if (ret > max_insns) - return -EFBIG; + return ret < 0 ? ret : ret * AARCH64_INSN_SIZE; +} + +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, jit_fill_hole); +} - ctx.image = image; - ctx.idx = 0; +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +void arch_protect_bpf_trampoline(void *image, unsigned int size) +{ +} + +void arch_unprotect_bpf_trampoline(void *image, unsigned int size) +{ +} - jit_fill_hole(image, (unsigned int)(image_end - image)); - ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, + void *ro_image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int ret, nregs; + void *image, *tmp; + u32 size = ro_image_end - ro_image; + + /* image doesn't need to be in module memory range, so we can + * use kvmalloc. + */ + image = kvmalloc(size, GFP_KERNEL); + if (!image) + return -ENOMEM; - if (ret > 0 && validate_code(&ctx) < 0) + struct jit_ctx ctx = { + .image = image, + .ro_image = ro_image, + .idx = 0, + }; + + nregs = btf_func_model_nregs(m); + /* the first 8 registers are used for arguments */ + if (nregs > 8) + return -ENOTSUPP; + + jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); + ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); + + if (ret > 0 && validate_code(&ctx) < 0) { ret = -EINVAL; + goto out; + } if (ret > 0) ret *= AARCH64_INSN_SIZE; + tmp = bpf_arch_text_copy(ro_image, image, size); + if (IS_ERR(tmp)) { + ret = PTR_ERR(tmp); + goto out; + } + + bpf_flush_icache(ro_image, ro_image + size); +out: + kvfree(image); return ret; } @@ -2218,3 +2448,42 @@ out: return ret; } + +bool bpf_jit_supports_ptr_xchg(void) +{ + return true; +} + +bool bpf_jit_supports_exceptions(void) +{ + /* We unwind through both kernel frames starting from within bpf_throw + * call and BPF frames. Therefore we require FP unwinder to be enabled + * to walk kernel frames and reach BPF frames in the stack trace. + * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y + */ + return true; +} + +void bpf_jit_free(struct bpf_prog *prog) +{ + if (prog->jited) { + struct arm64_jit_data *jit_data = prog->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), + * the program may not be finalized yet. Call finalize here + * before freeing it. + */ + if (jit_data) { + bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, + sizeof(jit_data->header->size)); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(prog); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); + } + + bpf_prog_unlock_free(prog); +} |