diff options
Diffstat (limited to 'arch/arm/net')
-rw-r--r-- | arch/arm/net/bpf_jit_32.c | 127 | ||||
-rw-r--r-- | arch/arm/net/bpf_jit_32.h | 3 |
2 files changed, 77 insertions, 53 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index cc29869d12a3..6a1c9fca5260 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -36,6 +36,10 @@ * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | ... | caller-saved registers + * +-----+ + * | ... | arguments passed on stack + * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | @@ -63,6 +67,12 @@ * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. + * + * Some eBPF operations are implemented via a call to a helper function. + * Such calls are "invisible" in the eBPF code, so it is up to the calling + * program to preserve any caller-saved ARM registers during the call. The + * JIT emits code to push and pop those registers onto the stack, immediately + * above the callee stack frame. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ @@ -70,6 +80,8 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) +#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) + enum { /* Stack layout - these are offsets from (top of stack - 4) */ BPF_R2_HI, @@ -151,7 +163,7 @@ static const s8 bpf2a32[][2] = { [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, /* Read only Frame Pointer to access Stack */ [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, - /* Temporary Register for internal BPF JIT, can be used + /* Temporary Register for BPF JIT, can be used * for constant blindings and others. */ [TMP_REG_1] = {ARM_R7, ARM_R6}, @@ -464,6 +476,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { + const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 @@ -495,11 +508,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, rm), ctx); } + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); + /* Call appropriate function */ emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); + /* Restore caller-saved registers from stack */ + emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); + /* Save return value */ if (rd != ARM_R0) emit(ARM_MOV_R(rd, ARM_R0), ctx); @@ -693,22 +712,6 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, } } -/* ALU operation (32 bit) - * dst = dst (op) src - */ -static inline void emit_a32_alu_r(const s8 dst, const s8 src, - struct jit_ctx *ctx, const bool is64, - const bool hi, const u8 op) { - const s8 *tmp = bpf2a32[TMP_REG_1]; - s8 rn, rd; - - rn = arm_bpf_get_reg32(src, tmp[1], ctx); - rd = arm_bpf_get_reg32(dst, tmp[0], ctx); - /* ALU operation */ - emit_alu_r(rd, rn, is64, hi, op, ctx); - arm_bpf_put_reg32(dst, rd, ctx); -} - /* ALU operation (64 bit) */ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx, @@ -795,6 +798,9 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val, case BPF_RSH: emit(ARM_LSR_I(rd, rd, val), ctx); break; + case BPF_ARSH: + emit(ARM_ASR_I(rd, rd, val), ctx); + break; case BPF_NEG: emit(ARM_RSB_I(rd, rd, val), ctx); break; @@ -860,8 +866,8 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[], emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx); - _emit(ARM_COND_MI, ARM_B(0), ctx); - emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); + _emit(ARM_COND_PL, + ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx); emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx); arm_bpf_put_reg32(dst_lo, ARM_LR, ctx); @@ -929,7 +935,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do LSR operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for LSR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx); @@ -955,7 +965,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[], rd = arm_bpf_get_reg64(dst, tmp, ctx); /* Do ARSH operation */ - if (val < 32) { + if (val == 0) { + /* An immediate value of 0 encodes a shift amount of 32 + * for ASR. To shift by 0, don't do anything. + */ + } else if (val < 32) { emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx); emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx); emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx); @@ -992,21 +1006,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], arm_bpf_put_reg32(dst_hi, rd[0], ctx); } +static bool is_ldst_imm(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + case BPF_DW: + /* Need to make sure off+4 does not overflow. */ + off_max = 0xfff - 4; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s32 off_max; s8 rd; rd = arm_bpf_get_reg32(dst, tmp[1], ctx); - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; @@ -1035,18 +1063,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[], /* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const s8 dst[], const s8 src, - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; - s32 off_max; - - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; @@ -1161,7 +1183,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) /* tmp2[0] = array, tmp2[1] = index */ - /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) + /* + * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; * tail_call_cnt++; */ @@ -1170,7 +1193,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) tc = arm_bpf_get_reg64(tcc, tmp, ctx); emit(ARM_CMP_I(tc[0], hi), ctx); _emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx); - _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx); + _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx); emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx); emit(ARM_ADC_I(tc[0], tc[0], 0), ctx); arm_bpf_put_reg64(tcc, tmp, ctx); @@ -1392,7 +1415,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_X: - case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_X: @@ -1449,10 +1471,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: goto notyet; - /* dst = dst >> imm */ /* dst = dst << imm */ - case BPF_ALU | BPF_RSH | BPF_K: + /* dst = dst >> imm */ + /* dst = dst >> imm (signed) */ case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: if (unlikely(imm > 31)) return -EINVAL; if (imm) @@ -1582,6 +1606,9 @@ exit: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: @@ -1600,10 +1627,9 @@ exit: } emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code)); break; - /* STX XADD: lock *(u32 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_W: - /* STX XADD: lock *(u64 *)(dst + off) += src */ - case BPF_STX | BPF_XADD | BPF_DW: + /* Atomic ops */ + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: goto notyet; /* STX: *(size *)(dst + off) = src */ case BPF_STX | BPF_MEM | BPF_W: @@ -1822,7 +1848,7 @@ static int build_body(struct jit_ctx *ctx) if (ctx->target == NULL) ctx->offsets[i] = ctx->idx; - /* If unsuccesfull, return with error code */ + /* If unsuccesful, return with error code */ if (ret) return ret; } @@ -1841,11 +1867,6 @@ static int validate_code(struct jit_ctx *ctx) return 0; } -void bpf_jit_compile(struct bpf_prog *prog) -{ - /* Nothing to do here. We support Internal BPF. */ -} - bool bpf_jit_needs_zext(void) { return true; @@ -1936,7 +1957,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) * for jit, although it can decrease the size of the image. * * As each arm instruction is of length 32bit, we are translating - * number of JITed intructions into the size required to store these + * number of JITed instructions into the size required to store these * JITed code. */ image_size = sizeof(u32) * ctx.idx; diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index fb67cbc589e0..e0b593a1498d 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -94,6 +94,9 @@ #define ARM_INST_LSR_I 0x01a00020 #define ARM_INST_LSR_R 0x01a00030 +#define ARM_INST_ASR_I 0x01a00040 +#define ARM_INST_ASR_R 0x01a00050 + #define ARM_INST_MOV_R 0x01a00000 #define ARM_INST_MOVS_R 0x01b00000 #define ARM_INST_MOV_I 0x03a00000 |