From 0224b2acea0f9e3908d33e27b2dcb4e04686e997 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Tue, 5 May 2020 17:03:17 -0700 Subject: bpf, riscv: Enable missing verifier_zext optimizations on RV64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 66d0d5a854a6 ("riscv: bpf: eliminate zero extension code-gen") added support for the verifier zero-extension optimization on RV64 and commit 46dd3d7d287b ("bpf, riscv: Enable zext optimization for more RV64G ALU ops") enabled it for more instruction cases. However, BPF_LSH BPF_X and BPF_{LSH,RSH,ARSH} BPF_K are still missing the optimization. This patch enables the zero-extension optimization for these remaining cases. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Reviewed-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20200506000320.28965-2-luke.r.nels@gmail.com --- arch/riscv/net/bpf_jit_comp64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/riscv/net/bpf_jit_comp64.c') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index d208a9fd6c52..e2636902a74e 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -515,7 +515,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_X: @@ -692,19 +692,19 @@ out_be: case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; -- cgit v1.2.3-59-g8ed1b From 21a099abb765c3754689e1f7ca4536fa560112d0 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Tue, 5 May 2020 17:03:18 -0700 Subject: bpf, riscv: Optimize FROM_LE using verifier_zext on RV64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two optimizations for BPF_ALU BPF_END BPF_FROM_LE in the RV64 BPF JIT. First, it enables the verifier zero-extension optimization to avoid zero extension when imm == 32. Second, it avoids generating code for imm == 64, since it is equivalent to a no-op. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Reviewed-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20200506000320.28965-3-luke.r.nels@gmail.com --- arch/riscv/net/bpf_jit_comp64.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch/riscv/net/bpf_jit_comp64.c') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index e2636902a74e..c3ce9a911b66 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -542,13 +542,21 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* dst = BSWAP##imm(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: - { - int shift = 64 - imm; - - emit(rv_slli(rd, rd, shift), ctx); - emit(rv_srli(rd, rd, shift), ctx); + switch (imm) { + case 16: + emit(rv_slli(rd, rd, 48), ctx); + emit(rv_srli(rd, rd, 48), ctx); + break; + case 32: + if (!aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case 64: + /* Do nothing */ + break; + } break; - } + case BPF_ALU | BPF_END | BPF_FROM_BE: emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx); -- cgit v1.2.3-59-g8ed1b From ca349a6a104e58479defdc08ce56472a48f7cb81 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Tue, 5 May 2020 17:03:19 -0700 Subject: bpf, riscv: Optimize BPF_JMP BPF_K when imm == 0 on RV64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds an optimization to BPF_JMP (32- and 64-bit) BPF_K for when the BPF immediate is zero. When the immediate is zero, the code can directly use the RISC-V zero register instead of loading a zero immediate to a temporary register first. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Reviewed-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20200506000320.28965-4-luke.r.nels@gmail.com --- arch/riscv/net/bpf_jit_comp64.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/riscv/net/bpf_jit_comp64.c') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index c3ce9a911b66..b07cef952019 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -796,7 +796,13 @@ out_be: case BPF_JMP32 | BPF_JSET | BPF_K: rvoff = rv_offset(i, off, ctx); s = ctx->ninsns; - emit_imm(RV_REG_T1, imm, ctx); + if (imm) { + emit_imm(RV_REG_T1, imm, ctx); + rs = RV_REG_T1; + } else { + /* If imm is 0, simply use zero register. */ + rs = RV_REG_ZERO; + } if (!is64) { if (is_signed_bpf_cond(BPF_OP(code))) emit_sext_32_rd(&rd, ctx); @@ -811,11 +817,10 @@ out_be: if (BPF_OP(code) == BPF_JSET) { /* Adjust for and */ rvoff -= 4; - emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); - emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, - ctx); + emit(rv_and(rs, rd, rs), ctx); + emit_branch(BPF_JNE, rs, RV_REG_ZERO, rvoff, ctx); } else { - emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx); + emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); } break; -- cgit v1.2.3-59-g8ed1b From 073ca6a0369e09c586a103e665f2dd67f1c71444 Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Tue, 5 May 2020 17:03:20 -0700 Subject: bpf, riscv: Optimize BPF_JSET BPF_K using andi on RV64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch optimizes BPF_JSET BPF_K by using a RISC-V andi instruction when the BPF immediate fits in 12 bits, instead of first loading the immediate to a temporary register. Examples of generated code with and without this optimization: BPF_JMP_IMM(BPF_JSET, R1, 2, 1) without optimization: 20: li t1,2 24: and t1,a0,t1 28: bnez t1,0x30 BPF_JMP_IMM(BPF_JSET, R1, 2, 1) with optimization: 20: andi t1,a0,2 24: bnez t1,0x2c BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) without optimization: 20: li t1,2 24: mv t2,a0 28: slli t2,t2,0x20 2c: srli t2,t2,0x20 30: slli t1,t1,0x20 34: srli t1,t1,0x20 38: and t1,t2,t1 3c: bnez t1,0x44 BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) with optimization: 20: andi t1,a0,2 24: bnez t1,0x2c In these examples, because the upper 32 bits of the sign-extended immediate are 0, BPF_JMP BPF_JSET and BPF_JMP32 BPF_JSET are equivalent and therefore the JIT produces identical code for them. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Reviewed-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20200506000320.28965-5-luke.r.nels@gmail.com --- arch/riscv/net/bpf_jit_comp64.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'arch/riscv/net/bpf_jit_comp64.c') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index b07cef952019..6cfd164cbe88 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -792,8 +792,6 @@ out_be: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: - case BPF_JMP | BPF_JSET | BPF_K: - case BPF_JMP32 | BPF_JSET | BPF_K: rvoff = rv_offset(i, off, ctx); s = ctx->ninsns; if (imm) { @@ -813,15 +811,28 @@ out_be: /* Adjust for extra insns */ rvoff -= (e - s) << 2; + emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); + break; - if (BPF_OP(code) == BPF_JSET) { - /* Adjust for and */ - rvoff -= 4; - emit(rv_and(rs, rd, rs), ctx); - emit_branch(BPF_JNE, rs, RV_REG_ZERO, rvoff, ctx); + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + rvoff = rv_offset(i, off, ctx); + s = ctx->ninsns; + if (is_12b_int(imm)) { + emit(rv_andi(RV_REG_T1, rd, imm), ctx); } else { - emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); + emit_imm(RV_REG_T1, imm, ctx); + emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); } + /* For jset32, we should clear the upper 32 bits of t1, but + * sign-extension is sufficient here and saves one instruction, + * as t1 is used only in comparison against zero. + */ + if (!is64 && imm < 0) + emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx); + e = ctx->ninsns; + rvoff -= (e - s) << 2; + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx); break; /* function call */ -- cgit v1.2.3-59-g8ed1b