aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/net
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/net')
-rw-r--r--arch/arm/net/bpf_jit_32.c127
-rw-r--r--arch/arm/net/bpf_jit_32.h3
2 files changed, 77 insertions, 53 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index cc29869d12a3..6a1c9fca5260 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -36,6 +36,10 @@
* +-----+
* |RSVD | JIT scratchpad
* current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+ * | ... | caller-saved registers
+ * +-----+
+ * | ... | arguments passed on stack
+ * ARM_SP during call => +-----|
* | |
* | ... | Function call stack
* | |
@@ -63,6 +67,12 @@
*
* When popping registers off the stack at the end of a BPF function, we
* reference them via the current ARM_FP register.
+ *
+ * Some eBPF operations are implemented via a call to a helper function.
+ * Such calls are "invisible" in the eBPF code, so it is up to the calling
+ * program to preserve any caller-saved ARM registers during the call. The
+ * JIT emits code to push and pop those registers onto the stack, immediately
+ * above the callee stack frame.
*/
#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \
@@ -70,6 +80,8 @@
#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
+#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3)
+
enum {
/* Stack layout - these are offsets from (top of stack - 4) */
BPF_R2_HI,
@@ -151,7 +163,7 @@ static const s8 bpf2a32[][2] = {
[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
/* Read only Frame Pointer to access Stack */
[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
- /* Temporary Register for internal BPF JIT, can be used
+ /* Temporary Register for BPF JIT, can be used
* for constant blindings and others.
*/
[TMP_REG_1] = {ARM_R7, ARM_R6},
@@ -464,6 +476,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
{
+ const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1);
const s8 *tmp = bpf2a32[TMP_REG_1];
#if __LINUX_ARM_ARCH__ == 7
@@ -495,11 +508,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
emit(ARM_MOV_R(ARM_R0, rm), ctx);
}
+ /* Push caller-saved registers on stack */
+ emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx);
+
/* Call appropriate function */
emit_mov_i(ARM_IP, op == BPF_DIV ?
(u32)jit_udiv32 : (u32)jit_mod32, ctx);
emit_blx_r(ARM_IP, ctx);
+ /* Restore caller-saved registers from stack */
+ emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx);
+
/* Save return value */
if (rd != ARM_R0)
emit(ARM_MOV_R(rd, ARM_R0), ctx);
@@ -693,22 +712,6 @@ static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
}
}
-/* ALU operation (32 bit)
- * dst = dst (op) src
- */
-static inline void emit_a32_alu_r(const s8 dst, const s8 src,
- struct jit_ctx *ctx, const bool is64,
- const bool hi, const u8 op) {
- const s8 *tmp = bpf2a32[TMP_REG_1];
- s8 rn, rd;
-
- rn = arm_bpf_get_reg32(src, tmp[1], ctx);
- rd = arm_bpf_get_reg32(dst, tmp[0], ctx);
- /* ALU operation */
- emit_alu_r(rd, rn, is64, hi, op, ctx);
- arm_bpf_put_reg32(dst, rd, ctx);
-}
-
/* ALU operation (64 bit) */
static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
const s8 src[], struct jit_ctx *ctx,
@@ -795,6 +798,9 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val,
case BPF_RSH:
emit(ARM_LSR_I(rd, rd, val), ctx);
break;
+ case BPF_ARSH:
+ emit(ARM_ASR_I(rd, rd, val), ctx);
+ break;
case BPF_NEG:
emit(ARM_RSB_I(rd, rd, val), ctx);
break;
@@ -860,8 +866,8 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
- _emit(ARM_COND_MI, ARM_B(0), ctx);
- emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
+ _emit(ARM_COND_PL,
+ ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
@@ -929,7 +935,11 @@ static inline void emit_a32_rsh_i64(const s8 dst[],
rd = arm_bpf_get_reg64(dst, tmp, ctx);
/* Do LSR operation */
- if (val < 32) {
+ if (val == 0) {
+ /* An immediate value of 0 encodes a shift amount of 32
+ * for LSR. To shift by 0, don't do anything.
+ */
+ } else if (val < 32) {
emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_LSR, val), ctx);
@@ -955,7 +965,11 @@ static inline void emit_a32_arsh_i64(const s8 dst[],
rd = arm_bpf_get_reg64(dst, tmp, ctx);
/* Do ARSH operation */
- if (val < 32) {
+ if (val == 0) {
+ /* An immediate value of 0 encodes a shift amount of 32
+ * for ASR. To shift by 0, don't do anything.
+ */
+ } else if (val < 32) {
emit(ARM_MOV_SI(tmp2[1], rd[1], SRTYPE_LSR, val), ctx);
emit(ARM_ORR_SI(rd[1], tmp2[1], rd[0], SRTYPE_ASL, 32 - val), ctx);
emit(ARM_MOV_SI(rd[0], rd[0], SRTYPE_ASR, val), ctx);
@@ -992,21 +1006,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
arm_bpf_put_reg32(dst_hi, rd[0], ctx);
}
+static bool is_ldst_imm(s16 off, const u8 size)
+{
+ s16 off_max = 0;
+
+ switch (size) {
+ case BPF_B:
+ case BPF_W:
+ off_max = 0xfff;
+ break;
+ case BPF_H:
+ off_max = 0xff;
+ break;
+ case BPF_DW:
+ /* Need to make sure off+4 does not overflow. */
+ off_max = 0xfff - 4;
+ break;
+ }
+ return -off_max <= off && off <= off_max;
+}
+
/* *(size *)(dst + off) = src */
static inline void emit_str_r(const s8 dst, const s8 src[],
- s32 off, struct jit_ctx *ctx, const u8 sz){
+ s16 off, struct jit_ctx *ctx, const u8 sz){
const s8 *tmp = bpf2a32[TMP_REG_1];
- s32 off_max;
s8 rd;
rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
- if (sz == BPF_H)
- off_max = 0xff;
- else
- off_max = 0xfff;
-
- if (off < 0 || off > off_max) {
+ if (!is_ldst_imm(off, sz)) {
emit_a32_mov_i(tmp[0], off, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
rd = tmp[0];
@@ -1035,18 +1063,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[],
/* dst = *(size*)(src + off) */
static inline void emit_ldx_r(const s8 dst[], const s8 src,
- s32 off, struct jit_ctx *ctx, const u8 sz){
+ s16 off, struct jit_ctx *ctx, const u8 sz){
const s8 *tmp = bpf2a32[TMP_REG_1];
const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
s8 rm = src;
- s32 off_max;
-
- if (sz == BPF_H)
- off_max = 0xff;
- else
- off_max = 0xfff;
- if (off < 0 || off > off_max) {
+ if (!is_ldst_imm(off, sz)) {
emit_a32_mov_i(tmp[0], off, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
rm = tmp[0];
@@ -1161,7 +1183,8 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
/* tmp2[0] = array, tmp2[1] = index */
- /* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ /*
+ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
* goto out;
* tail_call_cnt++;
*/
@@ -1170,7 +1193,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
tc = arm_bpf_get_reg64(tcc, tmp, ctx);
emit(ARM_CMP_I(tc[0], hi), ctx);
_emit(ARM_COND_EQ, ARM_CMP_I(tc[1], lo), ctx);
- _emit(ARM_COND_HI, ARM_B(jmp_offset), ctx);
+ _emit(ARM_COND_CS, ARM_B(jmp_offset), ctx);
emit(ARM_ADDS_I(tc[1], tc[1], 1), ctx);
emit(ARM_ADC_I(tc[0], tc[0], 0), ctx);
arm_bpf_put_reg64(tcc, tmp, ctx);
@@ -1392,7 +1415,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU | BPF_RSH | BPF_X:
- case BPF_ALU | BPF_ARSH | BPF_K:
case BPF_ALU | BPF_ARSH | BPF_X:
case BPF_ALU64 | BPF_ADD | BPF_K:
case BPF_ALU64 | BPF_ADD | BPF_X:
@@ -1449,10 +1471,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_MOD | BPF_K:
case BPF_ALU64 | BPF_MOD | BPF_X:
goto notyet;
- /* dst = dst >> imm */
/* dst = dst << imm */
- case BPF_ALU | BPF_RSH | BPF_K:
+ /* dst = dst >> imm */
+ /* dst = dst >> imm (signed) */
case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_K:
if (unlikely(imm > 31))
return -EINVAL;
if (imm)
@@ -1582,6 +1606,9 @@ exit:
rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
break;
+ /* speculation barrier */
+ case BPF_ST | BPF_NOSPEC:
+ break;
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_H:
@@ -1600,10 +1627,9 @@ exit:
}
emit_str_r(dst_lo, tmp2, off, ctx, BPF_SIZE(code));
break;
- /* STX XADD: lock *(u32 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_W:
- /* STX XADD: lock *(u64 *)(dst + off) += src */
- case BPF_STX | BPF_XADD | BPF_DW:
+ /* Atomic ops */
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
goto notyet;
/* STX: *(size *)(dst + off) = src */
case BPF_STX | BPF_MEM | BPF_W:
@@ -1822,7 +1848,7 @@ static int build_body(struct jit_ctx *ctx)
if (ctx->target == NULL)
ctx->offsets[i] = ctx->idx;
- /* If unsuccesfull, return with error code */
+ /* If unsuccesful, return with error code */
if (ret)
return ret;
}
@@ -1841,11 +1867,6 @@ static int validate_code(struct jit_ctx *ctx)
return 0;
}
-void bpf_jit_compile(struct bpf_prog *prog)
-{
- /* Nothing to do here. We support Internal BPF. */
-}
-
bool bpf_jit_needs_zext(void)
{
return true;
@@ -1936,7 +1957,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
* for jit, although it can decrease the size of the image.
*
* As each arm instruction is of length 32bit, we are translating
- * number of JITed intructions into the size required to store these
+ * number of JITed instructions into the size required to store these
* JITed code.
*/
image_size = sizeof(u32) * ctx.idx;
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index fb67cbc589e0..e0b593a1498d 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -94,6 +94,9 @@
#define ARM_INST_LSR_I 0x01a00020
#define ARM_INST_LSR_R 0x01a00030
+#define ARM_INST_ASR_I 0x01a00040
+#define ARM_INST_ASR_R 0x01a00050
+
#define ARM_INST_MOV_R 0x01a00000
#define ARM_INST_MOVS_R 0x01b00000
#define ARM_INST_MOV_I 0x03a00000