bpf, riscv: Optimize BPF_JSET BPF_K using andi on RV64

This patch optimizes BPF_JSET BPF_K by using a RISC-V andi instruction when the BPF immediate fits in 12 bits, instead of first loading the immediate to a temporary register. Examples of generated code with and without this optimization: BPF_JMP_IMM(BPF_JSET, R1, 2, 1) without optimization: 20: li t1,2 24: and t1,a0,t1 28: bnez t1,0x30 BPF_JMP_IMM(BPF_JSET, R1, 2, 1) with optimization: 20: andi t1,a0,2 24: bnez t1,0x2c BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) without optimization: 20: li t1,2 24: mv t2,a0 28: slli t2,t2,0x20 2c: srli t2,t2,0x20 30: slli t1,t1,0x20 34: srli t1,t1,0x20 38: and t1,t2,t1 3c: bnez t1,0x44 BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) with optimization: 20: andi t1,a0,2 24: bnez t1,0x2c In these examples, because the upper 32 bits of the sign-extended immediate are 0, BPF_JMP BPF_JSET and BPF_JMP32 BPF_JSET are equivalent and therefore the JIT produces identical code for them. Co-developed-by: Xi Wang <xi.wang@gmail.com> Signed-off-by: Xi Wang <xi.wang@gmail.com> Signed-off-by: Luke Nelson <luke.r.nels@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Björn Töpel <bjorn.topel@gmail.com> Acked-by: Björn Töpel <bjorn.topel@gmail.com> Link: https://lore.kernel.org/bpf/20200506000320.28965-5-luke.r.nels@gmail.com
author: Luke Nelson <lukenels@cs.washington.edu> 2020-05-05 17:03:20 -0700
committer: Daniel Borkmann <daniel@iogearbox.net> 2020-05-06 09:48:15 +0200
commit: 073ca6a0369e09c586a103e665f2dd67f1c71444 (patch)
tree: f1ca6237c59417e3626b2e1c583fb525683dd52e /arch/riscv/net/bpf_jit_comp64.c
parent: bpf, riscv: Optimize BPF_JMP BPF_K when imm == 0 on RV64 (diff)
download: wireguard-linux-073ca6a0369e09c586a103e665f2dd67f1c71444.tar.xz
wireguard-linux-073ca6a0369e09c586a103e665f2dd67f1c71444.zip
1 files changed, 19 insertions, 8 deletions
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index b07cef952019..6cfd164cbe88 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -792,8 +792,6 @@ out_be:
 	case BPF_JMP32 | BPF_JSGE | BPF_K:
 	case BPF_JMP | BPF_JSLE | BPF_K:
 	case BPF_JMP32 | BPF_JSLE | BPF_K:
-	case BPF_JMP | BPF_JSET | BPF_K:
-	case BPF_JMP32 | BPF_JSET | BPF_K:
 		rvoff = rv_offset(i, off, ctx);
 		s = ctx->ninsns;
 		if (imm) {
@@ -813,15 +811,28 @@ out_be:
 
 		/* Adjust for extra insns */
 		rvoff -= (e - s) << 2;
+		emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
+		break;
 
-		if (BPF_OP(code) == BPF_JSET) {
-			/* Adjust for and */
-			rvoff -= 4;
-			emit(rv_and(rs, rd, rs), ctx);
-			emit_branch(BPF_JNE, rs, RV_REG_ZERO, rvoff, ctx);
+	case BPF_JMP | BPF_JSET | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		rvoff = rv_offset(i, off, ctx);
+		s = ctx->ninsns;
+		if (is_12b_int(imm)) {
+			emit(rv_andi(RV_REG_T1, rd, imm), ctx);
 		} else {
-			emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
+			emit_imm(RV_REG_T1, imm, ctx);
+			emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
 		}
+		/* For jset32, we should clear the upper 32 bits of t1, but
+		 * sign-extension is sufficient here and saves one instruction,
+		 * as t1 is used only in comparison against zero.
+		 */
+		if (!is64 && imm < 0)
+			emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
+		e = ctx->ninsns;
+		rvoff -= (e - s) << 2;
+		emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
 		break;
 
 	/* function call */
author	Luke Nelson <lukenels@cs.washington.edu>	2020-05-05 17:03:20 -0700
committer	Daniel Borkmann <daniel@iogearbox.net>	2020-05-06 09:48:15 +0200
commit	073ca6a0369e09c586a103e665f2dd67f1c71444 (patch)
tree	f1ca6237c59417e3626b2e1c583fb525683dd52e /arch/riscv/net/bpf_jit_comp64.c
parent	bpf, riscv: Optimize BPF_JMP BPF_K when imm == 0 on RV64 (diff)
download	wireguard-linux-073ca6a0369e09c586a103e665f2dd67f1c71444.tar.xz wireguard-linux-073ca6a0369e09c586a103e665f2dd67f1c71444.zip