aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/net/bpf_jit_32.c106
-rw-r--r--arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi26
-rw-r--r--arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi32
-rw-r--r--arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi61
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi10
-rw-r--r--arch/arm64/net/bpf_jit_comp.c180
-rw-r--r--arch/mips/net/ebpf_jit.c130
-rw-r--r--arch/powerpc/net/Makefile2
-rw-r--r--arch/powerpc/net/bpf_jit64.h37
-rw-r--r--arch/powerpc/net/bpf_jit_asm64.S180
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c217
-rw-r--r--arch/s390/net/Makefile2
-rw-r--r--arch/s390/net/bpf_jit.S120
-rw-r--r--arch/s390/net/bpf_jit.h20
-rw-r--r--arch/s390/net/bpf_jit_comp.c127
-rw-r--r--arch/sparc/net/Makefile5
-rw-r--r--arch/sparc/net/bpf_jit_64.h29
-rw-r--r--arch/sparc/net/bpf_jit_asm_64.S162
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c80
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/include/asm/nospec-branch.h41
-rw-r--r--arch/x86/net/Makefile7
-rw-r--r--arch/x86/net/bpf_jit.S154
-rw-r--r--arch/x86/net/bpf_jit_comp.c144
-rw-r--r--arch/x86/net/bpf_jit_comp32.c2419
25 files changed, 2788 insertions, 1505 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index b5030e1a41d8..6e8b71613039 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -84,7 +84,7 @@
*
* 1. First argument is passed using the arm 32bit registers and rest of the
* arguments are passed on stack scratch space.
- * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest
+ * 2. First callee-saved argument is mapped to arm 32 bit registers and rest
* arguments are mapped to scratch space on stack.
* 3. We need two 64 bit temp registers to do complex operations on eBPF
* registers.
@@ -234,18 +234,11 @@ static void jit_fill_hole(void *area, unsigned int size)
#define SCRATCH_SIZE 80
/* total stack size used in JITed code */
-#define _STACK_SIZE \
- (ctx->prog->aux->stack_depth + \
- + SCRATCH_SIZE + \
- + 4 /* extra for skb_copy_bits buffer */)
-
-#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+#define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE)
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
/* Get the offset of eBPF REGISTERs stored on scratch space. */
-#define STACK_VAR(off) (STACK_SIZE-off-4)
-
-/* Offset of skb_copy_bits buffer */
-#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
+#define STACK_VAR(off) (STACK_SIZE - off)
#if __LINUX_ARM_ARCH__ < 7
@@ -708,7 +701,7 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
}
/* dst = dst >> src */
-static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
+static inline void emit_a32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
bool sstk, struct jit_ctx *ctx) {
const u8 *tmp = bpf2a32[TMP_REG_1];
const u8 *tmp2 = bpf2a32[TMP_REG_2];
@@ -724,7 +717,7 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
}
- /* Do LSH operation */
+ /* Do RSH operation */
emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
@@ -774,7 +767,7 @@ static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk,
}
/* dst = dst >> val */
-static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk,
+static inline void emit_a32_rsh_i64(const u8 dst[], bool dstk,
const u32 val, struct jit_ctx *ctx) {
const u8 *tmp = bpf2a32[TMP_REG_1];
const u8 *tmp2 = bpf2a32[TMP_REG_2];
@@ -1199,8 +1192,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
s32 jmp_offset;
#define check_imm(bits, imm) do { \
- if ((((imm) > 0) && ((imm) >> (bits))) || \
- (((imm) < 0) && (~(imm) >> (bits)))) { \
+ if ((imm) >= (1 << ((bits) - 1)) || \
+ (imm) < -(1 << ((bits) - 1))) { \
pr_info("[%2d] imm=%d(0x%x) out of range\n", \
i, imm, imm); \
return -EINVAL; \
@@ -1330,7 +1323,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_RSH | BPF_K:
if (unlikely(imm > 63))
return -EINVAL;
- emit_a32_lsr_i64(dst, dstk, imm, ctx);
+ emit_a32_rsh_i64(dst, dstk, imm, ctx);
break;
/* dst = dst << src */
case BPF_ALU64 | BPF_LSH | BPF_X:
@@ -1338,7 +1331,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
/* dst = dst >> src */
case BPF_ALU64 | BPF_RSH | BPF_X:
- emit_a32_lsr_r64(dst, src, dstk, sstk, ctx);
+ emit_a32_rsh_r64(dst, src, dstk, sstk, ctx);
break;
/* dst = dst >> src (signed) */
case BPF_ALU64 | BPF_ARSH | BPF_X:
@@ -1452,83 +1445,6 @@ exit:
emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
break;
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
- case BPF_LD | BPF_ABS | BPF_W:
- case BPF_LD | BPF_ABS | BPF_H:
- case BPF_LD | BPF_ABS | BPF_B:
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
- case BPF_LD | BPF_IND | BPF_W:
- case BPF_LD | BPF_IND | BPF_H:
- case BPF_LD | BPF_IND | BPF_B:
- {
- const u8 r4 = bpf2a32[BPF_REG_6][1]; /* r4 = ptr to sk_buff */
- const u8 r0 = bpf2a32[BPF_REG_0][1]; /*r0: struct sk_buff *skb*/
- /* rtn value */
- const u8 r1 = bpf2a32[BPF_REG_0][0]; /* r1: int k */
- const u8 r2 = bpf2a32[BPF_REG_1][1]; /* r2: unsigned int size */
- const u8 r3 = bpf2a32[BPF_REG_1][0]; /* r3: void *buffer */
- const u8 r6 = bpf2a32[TMP_REG_1][1]; /* r6: void *(*func)(..) */
- int size;
-
- /* Setting up first argument */
- emit(ARM_MOV_R(r0, r4), ctx);
-
- /* Setting up second argument */
- emit_a32_mov_i(r1, imm, false, ctx);
- if (BPF_MODE(code) == BPF_IND)
- emit_a32_alu_r(r1, src_lo, false, sstk, ctx,
- false, false, BPF_ADD);
-
- /* Setting up third argument */
- switch (BPF_SIZE(code)) {
- case BPF_W:
- size = 4;
- break;
- case BPF_H:
- size = 2;
- break;
- case BPF_B:
- size = 1;
- break;
- default:
- return -EINVAL;
- }
- emit_a32_mov_i(r2, size, false, ctx);
-
- /* Setting up fourth argument */
- emit(ARM_ADD_I(r3, ARM_SP, imm8m(SKB_BUFFER)), ctx);
-
- /* Setting up function pointer to call */
- emit_a32_mov_i(r6, (unsigned int)bpf_load_pointer, false, ctx);
- emit_blx_r(r6, ctx);
-
- emit(ARM_EOR_R(r1, r1, r1), ctx);
- /* Check if return address is NULL or not.
- * if NULL then jump to epilogue
- * else continue to load the value from retn address
- */
- emit(ARM_CMP_I(r0, 0), ctx);
- jmp_offset = epilogue_offset(ctx);
- check_imm24(jmp_offset);
- _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
-
- /* Load value from the address */
- switch (BPF_SIZE(code)) {
- case BPF_W:
- emit(ARM_LDR_I(r0, r0, 0), ctx);
- emit_rev32(r0, r0, ctx);
- break;
- case BPF_H:
- emit(ARM_LDRH_I(r0, r0, 0), ctx);
- emit_rev16(r0, r0, ctx);
- break;
- case BPF_B:
- emit(ARM_LDRB_I(r0, r0, 0), ctx);
- /* No need to reverse */
- break;
- }
- break;
- }
/* ST: *(size *)(dst + off) = imm */
case BPF_ST | BPF_MEM | BPF_W:
case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi
index 24552f19b3fa..6a573875d45a 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c-pins.dtsi
@@ -36,4 +36,30 @@
drive-strength = <2>; /* 2 MA */
};
};
+
+ blsp1_uart1_default: blsp1_uart1_default {
+ mux {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ function = "blsp_uart2";
+ };
+
+ config {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ drive-strength = <16>;
+ bias-disable;
+ };
+ };
+
+ blsp1_uart1_sleep: blsp1_uart1_sleep {
+ mux {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ function = "gpio";
+ };
+
+ config {
+ pins = "gpio41", "gpio42", "gpio43", "gpio44";
+ drive-strength = <2>;
+ bias-disable;
+ };
+ };
};
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi
index 59b29ddfb6e9..6167af955659 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c-pmic-pins.dtsi
@@ -14,6 +14,28 @@
};
};
+ bt_en_gpios: bt_en_gpios {
+ pinconf {
+ pins = "gpio19";
+ function = PMIC_GPIO_FUNC_NORMAL;
+ output-low;
+ power-source = <PM8994_GPIO_S4>; // 1.8V
+ qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+ bias-pull-down;
+ };
+ };
+
+ wlan_en_gpios: wlan_en_gpios {
+ pinconf {
+ pins = "gpio8";
+ function = PMIC_GPIO_FUNC_NORMAL;
+ output-low;
+ power-source = <PM8994_GPIO_S4>; // 1.8V
+ qcom,drive-strength = <PMIC_GPIO_STRENGTH_LOW>;
+ bias-pull-down;
+ };
+ };
+
volume_up_gpio: pm8996_gpio2 {
pinconf {
pins = "gpio2";
@@ -26,6 +48,16 @@
};
};
+ divclk4_pin_a: divclk4 {
+ pinconf {
+ pins = "gpio18";
+ function = PMIC_GPIO_FUNC_FUNC2;
+
+ bias-disable;
+ power-source = <PM8994_GPIO_S4>;
+ };
+ };
+
usb3_vbus_det_gpio: pm8996_gpio22 {
pinconf {
pins = "gpio22";
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
index 1c8f1b86472d..4b8bb026346e 100644
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -23,6 +23,7 @@
aliases {
serial0 = &blsp2_uart1;
serial1 = &blsp2_uart2;
+ serial2 = &blsp1_uart1;
i2c0 = &blsp1_i2c2;
i2c1 = &blsp2_i2c1;
i2c2 = &blsp2_i2c0;
@@ -34,7 +35,36 @@
stdout-path = "serial0:115200n8";
};
+ clocks {
+ divclk4: divclk4 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "divclk4";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&divclk4_pin_a>;
+ };
+ };
+
soc {
+ serial@7570000 {
+ label = "BT-UART";
+ status = "okay";
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&blsp1_uart1_default>;
+ pinctrl-1 = <&blsp1_uart1_sleep>;
+
+ bluetooth {
+ compatible = "qcom,qca6174-bt";
+
+ /* bt_disable_n gpio */
+ enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>;
+
+ clocks = <&divclk4>;
+ };
+ };
+
serial@75b0000 {
label = "LS-UART1";
status = "okay";
@@ -139,9 +169,40 @@
pinctrl-0 = <&usb2_vbus_det_gpio>;
};
+ bt_en: bt-en-1-8v {
+ pinctrl-names = "default";
+ pinctrl-0 = <&bt_en_gpios>;
+ compatible = "regulator-fixed";
+ regulator-name = "bt-en-regulator";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ /* WLAN card specific delay */
+ startup-delay-us = <70000>;
+ enable-active-high;
+ };
+
+ wlan_en: wlan-en-1-8v {
+ pinctrl-names = "default";
+ pinctrl-0 = <&wlan_en_gpios>;
+ compatible = "regulator-fixed";
+ regulator-name = "wlan-en-regulator";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ gpio = <&pm8994_gpios 8 0>;
+
+ /* WLAN card specific delay */
+ startup-delay-us = <70000>;
+ enable-active-high;
+ };
+
agnoc@0 {
qcom,pcie@600000 {
+ status = "okay";
perst-gpio = <&msmgpio 35 GPIO_ACTIVE_LOW>;
+ vddpe-supply = <&wlan_en>;
+ vddpe1-supply = <&bt_en>;
};
qcom,pcie@608000 {
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 410ae787ebb4..f8e49d0b4681 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -419,6 +419,16 @@
#clock-cells = <1>;
};
+ blsp1_uart1: serial@7570000 {
+ compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+ reg = <0x07570000 0x1000>;
+ interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>,
+ <&gcc GCC_BLSP1_AHB_CLK>;
+ clock-names = "core", "iface";
+ status = "disabled";
+ };
+
blsp1_spi0: spi@7575000 {
compatible = "qcom,spi-qup-v2.2.1";
reg = <0x07575000 0x600>;
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a93350451e8e..a6fdaea07c63 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -21,7 +21,6 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/printk.h>
-#include <linux/skbuff.h>
#include <linux/slab.h>
#include <asm/byteorder.h>
@@ -80,23 +79,66 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
ctx->idx++;
}
+static inline void emit_a64_mov_i(const int is64, const int reg,
+ const s32 val, struct jit_ctx *ctx)
+{
+ u16 hi = val >> 16;
+ u16 lo = val & 0xffff;
+
+ if (hi & 0x8000) {
+ if (hi == 0xffff) {
+ emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
+ } else {
+ emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
+ if (lo != 0xffff)
+ emit(A64_MOVK(is64, reg, lo, 0), ctx);
+ }
+ } else {
+ emit(A64_MOVZ(is64, reg, lo, 0), ctx);
+ if (hi)
+ emit(A64_MOVK(is64, reg, hi, 16), ctx);
+ }
+}
+
+static int i64_i16_blocks(const u64 val, bool inverse)
+{
+ return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+ (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+ (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
+ (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
+}
+
static inline void emit_a64_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
- u64 tmp = val;
- int shift = 0;
-
- emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
- tmp >>= 16;
- shift += 16;
- while (tmp) {
- if (tmp & 0xffff)
- emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
- tmp >>= 16;
- shift += 16;
+ u64 nrm_tmp = val, rev_tmp = ~val;
+ bool inverse;
+ int shift;
+
+ if (!(nrm_tmp >> 32))
+ return emit_a64_mov_i(0, reg, (u32)val, ctx);
+
+ inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
+ shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
+ (fls64(nrm_tmp) - 1)), 16), 0);
+ if (inverse)
+ emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
+ else
+ emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+ shift -= 16;
+ while (shift >= 0) {
+ if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
+ emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
+ shift -= 16;
}
}
+/*
+ * This is an unoptimized 64 immediate emission used for BPF to BPF call
+ * addresses. It will always do a full 64 bit decomposition as otherwise
+ * more complexity in the last extra pass is required since we previously
+ * reserved 4 instructions for the address.
+ */
static inline void emit_addr_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
@@ -111,26 +153,6 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
}
}
-static inline void emit_a64_mov_i(const int is64, const int reg,
- const s32 val, struct jit_ctx *ctx)
-{
- u16 hi = val >> 16;
- u16 lo = val & 0xffff;
-
- if (hi & 0x8000) {
- if (hi == 0xffff) {
- emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
- } else {
- emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
- emit(A64_MOVK(is64, reg, lo, 0), ctx);
- }
- } else {
- emit(A64_MOVZ(is64, reg, lo, 0), ctx);
- if (hi)
- emit(A64_MOVK(is64, reg, hi, 16), ctx);
- }
-}
-
static inline int bpf2a64_offset(int bpf_to, int bpf_from,
const struct jit_ctx *ctx)
{
@@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET 7
-static int build_prologue(struct jit_ctx *ctx)
+static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
const struct bpf_prog *prog = ctx->prog;
const u8 r6 = bpf2a64[BPF_REG_6];
@@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx)
* | ... | BPF prog stack
* | |
* +-----+ <= (BPF_FP - prog->aux->stack_depth)
- * |RSVD | JIT scratchpad
+ * |RSVD | padding
* current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size)
* | |
* | ... | Function call stack
@@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx)
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
- /* Initialize tail_call_cnt */
- emit(A64_MOVZ(1, tcc, 0, 0), ctx);
+ if (!ebpf_from_cbpf) {
+ /* Initialize tail_call_cnt */
+ emit(A64_MOVZ(1, tcc, 0, 0), ctx);
- cur_offset = ctx->idx - idx0;
- if (cur_offset != PROLOGUE_OFFSET) {
- pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
- cur_offset, PROLOGUE_OFFSET);
- return -1;
+ cur_offset = ctx->idx - idx0;
+ if (cur_offset != PROLOGUE_OFFSET) {
+ pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
+ cur_offset, PROLOGUE_OFFSET);
+ return -1;
+ }
}
- /* 4 byte extra for skb_copy_bits buffer */
- ctx->stack_size = prog->aux->stack_depth + 4;
- ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+ ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
@@ -723,71 +745,6 @@ emit_cond_jmp:
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
break;
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
- case BPF_LD | BPF_ABS | BPF_W:
- case BPF_LD | BPF_ABS | BPF_H:
- case BPF_LD | BPF_ABS | BPF_B:
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
- case BPF_LD | BPF_IND | BPF_W:
- case BPF_LD | BPF_IND | BPF_H:
- case BPF_LD | BPF_IND | BPF_B:
- {
- const u8 r0 = bpf2a64[BPF_REG_0]; /* r0 = return value */
- const u8 r6 = bpf2a64[BPF_REG_6]; /* r6 = pointer to sk_buff */
- const u8 fp = bpf2a64[BPF_REG_FP];
- const u8 r1 = bpf2a64[BPF_REG_1]; /* r1: struct sk_buff *skb */
- const u8 r2 = bpf2a64[BPF_REG_2]; /* r2: int k */
- const u8 r3 = bpf2a64[BPF_REG_3]; /* r3: unsigned int size */
- const u8 r4 = bpf2a64[BPF_REG_4]; /* r4: void *buffer */
- const u8 r5 = bpf2a64[BPF_REG_5]; /* r5: void *(*func)(...) */
- int size;
-
- emit(A64_MOV(1, r1, r6), ctx);
- emit_a64_mov_i(0, r2, imm, ctx);
- if (BPF_MODE(code) == BPF_IND)
- emit(A64_ADD(0, r2, r2, src), ctx);
- switch (BPF_SIZE(code)) {
- case BPF_W:
- size = 4;
- break;
- case BPF_H:
- size = 2;
- break;
- case BPF_B:
- size = 1;
- break;
- default:
- return -EINVAL;
- }
- emit_a64_mov_i64(r3, size, ctx);
- emit(A64_SUB_I(1, r4, fp, ctx->stack_size), ctx);
- emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
- emit(A64_BLR(r5), ctx);
- emit(A64_MOV(1, r0, A64_R(0)), ctx);
-
- jmp_offset = epilogue_offset(ctx);
- check_imm19(jmp_offset);
- emit(A64_CBZ(1, r0, jmp_offset), ctx);
- emit(A64_MOV(1, r5, r0), ctx);
- switch (BPF_SIZE(code)) {
- case BPF_W:
- emit(A64_LDR32(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
- emit(A64_REV32(0, r0, r0), ctx);
-#endif
- break;
- case BPF_H:
- emit(A64_LDRH(r0, r5, A64_ZR), ctx);
-#ifndef CONFIG_CPU_BIG_ENDIAN
- emit(A64_REV16(0, r0, r0), ctx);
-#endif
- break;
- case BPF_B:
- emit(A64_LDRB(r0, r5, A64_ZR), ctx);
- break;
- }
- break;
- }
default:
pr_err_once("unknown opcode %02x\n", code);
return -EINVAL;
@@ -851,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
struct arm64_jit_data *jit_data;
+ bool was_classic = bpf_prog_was_classic(prog);
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
@@ -905,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}
- if (build_prologue(&ctx)) {
+ if (build_prologue(&ctx, was_classic)) {
prog = orig_prog;
goto out_off;
}
@@ -928,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
skip_init_ctx:
ctx.idx = 0;
- build_prologue(&ctx);
+ build_prologue(&ctx, was_classic);
if (build_body(&ctx)) {
bpf_jit_binary_free(header);
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 3e2798bfea4f..aeb7b1b0f202 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -95,7 +95,6 @@ enum reg_val_type {
* struct jit_ctx - JIT context
* @skf: The sk_filter
* @stack_size: eBPF stack size
- * @tmp_offset: eBPF $sp offset to 8-byte temporary memory
* @idx: Instruction index
* @flags: JIT flags
* @offsets: Instruction offsets
@@ -105,7 +104,6 @@ enum reg_val_type {
struct jit_ctx {
const struct bpf_prog *skf;
int stack_size;
- int tmp_offset;
u32 idx;
u32 flags;
u32 *offsets;
@@ -293,7 +291,6 @@ static int gen_int_prologue(struct jit_ctx *ctx)
locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;
stack_adjust += locals_size;
- ctx->tmp_offset = locals_size;
ctx->stack_size = stack_adjust;
@@ -399,7 +396,6 @@ static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
emit_instr(ctx, lui, reg, upper >> 16);
emit_instr(ctx, addiu, reg, reg, lower);
}
-
}
static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
@@ -547,28 +543,6 @@ static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
return 0;
}
-static void * __must_check
-ool_skb_header_pointer(const struct sk_buff *skb, int offset,
- int len, void *buffer)
-{
- return skb_header_pointer(skb, offset, len, buffer);
-}
-
-static int size_to_len(const struct bpf_insn *insn)
-{
- switch (BPF_SIZE(insn->code)) {
- case BPF_B:
- return 1;
- case BPF_H:
- return 2;
- case BPF_W:
- return 4;
- case BPF_DW:
- return 8;
- }
- return 0;
-}
-
static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
{
if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {
@@ -1267,110 +1241,6 @@ jeq_common:
return -EINVAL;
break;
- case BPF_LD | BPF_B | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_DW | BPF_ABS:
- ctx->flags |= EBPF_SAVE_RA;
-
- gen_imm_to_reg(insn, MIPS_R_A1, ctx);
- emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
-
- if (insn->imm < 0) {
- emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper);
- } else {
- emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
- emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
- }
- goto ld_skb_common;
-
- case BPF_LD | BPF_B | BPF_IND:
- case BPF_LD | BPF_H | BPF_IND:
- case BPF_LD | BPF_W | BPF_IND:
- case BPF_LD | BPF_DW | BPF_IND:
- ctx->flags |= EBPF_SAVE_RA;
- src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp);
- if (src < 0)
- return src;
- ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
- if (ts == REG_32BIT_ZERO_EX) {
- /* sign extend */
- emit_instr(ctx, sll, MIPS_R_A1, src, 0);
- src = MIPS_R_A1;
- }
- if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
- emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm);
- } else {
- gen_imm_to_reg(insn, MIPS_R_AT, ctx);
- emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src);
- }
- /* truncate to 32-bit int */
- emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0);
- emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset);
- emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO);
-
- emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper);
- emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer);
- emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn));
- emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT);
-
-ld_skb_common:
- emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
- /* delay slot move */
- emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO);
-
- /* Check the error value */
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off)) {
- target = j_target(ctx, exit_idx);
- if (target == (unsigned int)-1)
- return -E2BIG;
-
- if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
- ctx->offsets[this_idx] |= OFFSETS_B_CONV;
- ctx->long_b_conversion = 1;
- }
- emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3);
- emit_instr(ctx, nop);
- emit_instr(ctx, j, target);
- emit_instr(ctx, nop);
- } else {
- emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off);
- emit_instr(ctx, nop);
- }
-
-#ifdef __BIG_ENDIAN
- need_swap = false;
-#else
- need_swap = true;
-#endif
- dst = MIPS_R_V0;
- switch (BPF_SIZE(insn->code)) {
- case BPF_B:
- emit_instr(ctx, lbu, dst, 0, MIPS_R_V0);
- break;
- case BPF_H:
- emit_instr(ctx, lhu, dst, 0, MIPS_R_V0);
- if (need_swap)
- emit_instr(ctx, wsbh, dst, dst);
- break;
- case BPF_W:
- emit_instr(ctx, lw, dst, 0, MIPS_R_V0);
- if (need_swap) {
- emit_instr(ctx, wsbh, dst, dst);
- emit_instr(ctx, rotr, dst, dst, 16);
- }
- break;
- case BPF_DW:
- emit_instr(ctx, ld, dst, 0, MIPS_R_V0);
- if (need_swap) {
- emit_instr(ctx, dsbh, dst, dst);
- emit_instr(ctx, dshd, dst, dst);
- }
- break;
- }
-
- break;
case BPF_ALU | BPF_END | BPF_FROM_BE:
case BPF_ALU | BPF_END | BPF_FROM_LE:
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 02d369ca6a53..809f019d3cba 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -3,7 +3,7 @@
# Arch-specific network modules
#
ifeq ($(CONFIG_PPC64),y)
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
else
obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 8bdef7ed28a8..3609be4692b3 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -20,7 +20,7 @@
* with our redzone usage.
*
* [ prev sp ] <-------------
- * [ nv gpr save area ] 8*8 |
+ * [ nv gpr save area ] 6*8 |
* [ tail_call_cnt ] 8 |
* [ local_tmp_var ] 8 |
* fp (r31) --> [ ebpf stack space ] upto 512 |
@@ -28,8 +28,8 @@
* sp (r1) ---> [ stack pointer ] --------------
*/
-/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */
-#define BPF_PPC_STACK_SAVE (8*8)
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE (6*8)
/* for bpf JIT code internal usage */
#define BPF_PPC_STACK_LOCALS 16
/* stack frame excluding BPF stack, ensure this is quadword aligned */
@@ -39,10 +39,8 @@
#ifndef __ASSEMBLY__
/* BPF register usage */
-#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0)
-#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1)
-#define TMP_REG_1 (MAX_BPF_JIT_REG + 2)
-#define TMP_REG_2 (MAX_BPF_JIT_REG + 3)
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
/* BPF to ppc register mappings */
static const int b2p[] = {
@@ -63,40 +61,23 @@ static const int b2p[] = {
[BPF_REG_FP] = 31,
/* eBPF jit internal registers */
[BPF_REG_AX] = 2,
- [SKB_HLEN_REG] = 25,
- [SKB_DATA_REG] = 26,
[TMP_REG_1] = 9,
[TMP_REG_2] = 10
};
-/* PPC NVR range -- update this if we ever use NVRs below r24 */
-#define BPF_PPC_NVR_MIN 24
-
-/* Assembly helpers */
-#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \
- u64 func##_negative_offset(u64 r3, u64 r4); \
- u64 func##_positive_offset(u64 r3, u64 r4);
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-
-#define CHOOSE_LOAD_FUNC(imm, func) \
- (imm < 0 ? \
- (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \
- func##_positive_offset)
+/* PPC NVR range -- update this if we ever use NVRs below r27 */
+#define BPF_PPC_NVR_MIN 27
#define SEEN_FUNC 0x1000 /* might call external helpers */
#define SEEN_STACK 0x2000 /* uses BPF stack */
-#define SEEN_SKB 0x4000 /* uses sk_buff */
-#define SEEN_TAILCALL 0x8000 /* uses tail calls */
+#define SEEN_TAILCALL 0x4000 /* uses tail calls */
struct codegen_context {
/*
* This is used to track register usage as well
* as calls to external helpers.
* - register usage is tracked with corresponding
- * bits (r3-r10 and r25-r31)
+ * bits (r3-r10 and r27-r31)
* - rest of the bits can be used to track other
* things -- for now, we use bits 16 to 23
* encoded in SEEN_* macros above
diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S
deleted file mode 100644
index 7e4c51430b84..000000000000
--- a/arch/powerpc/net/bpf_jit_asm64.S
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * bpf_jit_asm64.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2016, Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
- * IBM Corporation
- *
- * Based on bpf_jit_asm.S by Matt Evans
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/ptrace.h>
-#include "bpf_jit64.h"
-
-/*
- * All of these routines are called directly from generated code,
- * with the below register usage:
- * r27 skb pointer (ctx)
- * r25 skb header length
- * r26 skb->data pointer
- * r4 offset
- *
- * Result is passed back in:
- * r8 data read in host endian format (accumulator)
- *
- * r9 is used as a temporary register
- */
-
-#define r_skb r27
-#define r_hlen r25
-#define r_data r26
-#define r_off r4
-#define r_val r8
-#define r_tmp r9
-
-_GLOBAL_TOC(sk_load_word)
- cmpdi r_off, 0
- blt bpf_slow_path_word_neg
- b sk_load_word_positive_offset
-
-_GLOBAL_TOC(sk_load_word_positive_offset)
- /* Are we accessing past headlen? */
- subi r_tmp, r_hlen, 4
- cmpd r_tmp, r_off
- blt bpf_slow_path_word
- /* Nope, just hitting the header. cr0 here is eq or gt! */
- LWZX_BE r_val, r_data, r_off
- blr /* Return success, cr0 != LT */
-
-_GLOBAL_TOC(sk_load_half)
- cmpdi r_off, 0
- blt bpf_slow_path_half_neg
- b sk_load_half_positive_offset
-
-_GLOBAL_TOC(sk_load_half_positive_offset)
- subi r_tmp, r_hlen, 2
- cmpd r_tmp, r_off
- blt bpf_slow_path_half
- LHZX_BE r_val, r_data, r_off
- blr
-
-_GLOBAL_TOC(sk_load_byte)
- cmpdi r_off, 0
- blt bpf_slow_path_byte_neg
- b sk_load_byte_positive_offset
-
-_GLOBAL_TOC(sk_load_byte_positive_offset)
- cmpd r_hlen, r_off
- ble bpf_slow_path_byte
- lbzx r_val, r_data, r_off
- blr
-
-/*
- * Call out to skb_copy_bits:
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE) \
- mflr r0; \
- std r0, PPC_LR_STKOFF(r1); \
- stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \
- mr r3, r_skb; \
- /* r4 = r_off as passed */ \
- addi r5, r1, STACK_FRAME_MIN_SIZE; \
- li r6, SIZE; \
- bl skb_copy_bits; \
- nop; \
- /* save r5 */ \
- addi r5, r1, STACK_FRAME_MIN_SIZE; \
- /* r3 = 0 on success */ \
- addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \
- ld r0, PPC_LR_STKOFF(r1); \
- mtlr r0; \
- cmpdi r3, 0; \
- blt bpf_error; /* cr0 = LT */
-
-bpf_slow_path_word:
- bpf_slow_path_common(4)
- /* Data value is on stack, and cr0 != LT */
- LWZX_BE r_val, 0, r5
- blr
-
-bpf_slow_path_half:
- bpf_slow_path_common(2)
- LHZX_BE r_val, 0, r5
- blr
-
-bpf_slow_path_byte:
- bpf_slow_path_common(1)
- lbzx r_val, 0, r5
- blr
-
-/*
- * Call out to bpf_internal_load_pointer_neg_helper
- */
-#define sk_negative_common(SIZE) \
- mflr r0; \
- std r0, PPC_LR_STKOFF(r1); \
- stdu r1, -STACK_FRAME_MIN_SIZE(r1); \
- mr r3, r_skb; \
- /* r4 = r_off, as passed */ \
- li r5, SIZE; \
- bl bpf_internal_load_pointer_neg_helper; \
- nop; \
- addi r1, r1, STACK_FRAME_MIN_SIZE; \
- ld r0, PPC_LR_STKOFF(r1); \
- mtlr r0; \
- /* R3 != 0 on success */ \
- cmpldi r3, 0; \
- beq bpf_error_slow; /* cr0 = EQ */
-
-bpf_slow_path_word_neg:
- lis r_tmp, -32 /* SKF_LL_OFF */
- cmpd r_off, r_tmp /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- b sk_load_word_negative_offset
-
-_GLOBAL_TOC(sk_load_word_negative_offset)
- sk_negative_common(4)
- LWZX_BE r_val, 0, r3
- blr
-
-bpf_slow_path_half_neg:
- lis r_tmp, -32 /* SKF_LL_OFF */
- cmpd r_off, r_tmp /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- b sk_load_half_negative_offset
-
-_GLOBAL_TOC(sk_load_half_negative_offset)
- sk_negative_common(2)
- LHZX_BE r_val, 0, r3
- blr
-
-bpf_slow_path_byte_neg:
- lis r_tmp, -32 /* SKF_LL_OFF */
- cmpd r_off, r_tmp /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- b sk_load_byte_negative_offset
-
-_GLOBAL_TOC(sk_load_byte_negative_offset)
- sk_negative_common(1)
- lbzx r_val, 0, r3
- blr
-
-bpf_error_slow:
- /* fabricate a cr0 = lt */
- li r_tmp, -1
- cmpdi r_tmp, 0
-bpf_error:
- /*
- * Entered with cr0 = lt
- * Generated code will 'blt epilogue', returning 0.
- */
- li r_val, 0
- blr
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0ef3d9580e98..f1c95779843b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
* [ prev sp ] <-------------
* [ ... ] |
* sp (r1) ---> [ stack pointer ] --------------
- * [ nv gpr save area ] 8*8
+ * [ nv gpr save area ] 6*8
* [ tail_call_cnt ] 8
* [ local_tmp_var ] 8
* [ unused red zone ] 208 bytes protected
@@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
BUG();
}
-static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
-{
- /*
- * Load skb->len and skb->data_len
- * r3 points to skb
- */
- PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
- PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
- /* header_len = len - data_len */
- PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
-
- /* skb->data pointer */
- PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
-}
-
static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
@@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
if (bpf_is_seen_register(ctx, i))
PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
- /*
- * Save additional non-volatile regs if we cache skb
- * Also, setup skb data
- */
- if (ctx->seen & SEEN_SKB) {
- PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
- bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
- PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
- bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
- bpf_jit_emit_skb_loads(image, ctx);
- }
-
/* Setup frame pointer to point to the bpf stack area */
if (bpf_is_seen_register(ctx, BPF_REG_FP))
PPC_ADDI(b2p[BPF_REG_FP], 1,
@@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
if (bpf_is_seen_register(ctx, i))
PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
- /* Restore non-volatile registers used for skb cache */
- if (ctx->seen & SEEN_SKB) {
- PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
- bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
- PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
- bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
- }
-
/* Tear down our stack frame */
if (bpf_has_stack_frame(ctx)) {
PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
@@ -202,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
{
+ unsigned int i, ctx_idx = ctx->idx;
+
+ /* Load function address into r12 */
+ PPC_LI64(12, func);
+
+ /* For bpf-to-bpf function calls, the callee's address is unknown
+ * until the last extra pass. As seen above, we use PPC_LI64() to
+ * load the callee's address, but this may optimize the number of
+ * instructions required based on the nature of the address.
+ *
+ * Since we don't want the number of instructions emitted to change,
+ * we pad the optimized PPC_LI64() call with NOPs to guarantee that
+ * we always have a five-instruction sequence, which is the maximum
+ * that PPC_LI64() can emit.
+ */
+ for (i = ctx->idx - ctx_idx; i < 5; i++)
+ PPC_NOP();
+
#ifdef PPC64_ELF_ABI_v1
- /* func points to the function descriptor */
- PPC_LI64(b2p[TMP_REG_2], func);
- /* Load actual entry point from function descriptor */
- PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
- /* ... and move it to LR */
- PPC_MTLR(b2p[TMP_REG_1]);
/*
* Load TOC from function descriptor at offset 8.
* We can clobber r2 since we get called through a
* function pointer (so caller will save/restore r2)
* and since we don't use a TOC ourself.
*/
- PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
-#else
- /* We can clobber r12 */
- PPC_FUNC_ADDR(12, func);
- PPC_MTLR(12);
+ PPC_BPF_LL(2, 12, 8);
+ /* Load actual entry point from function descriptor */
+ PPC_BPF_LL(12, 12, 0);
#endif
+
+ PPC_MTLR(12);
PPC_BLRL();
}
@@ -291,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx,
- u32 *addrs)
+ u32 *addrs, bool extra_pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@@ -747,29 +724,30 @@ emit_clear:
break;
/*
- * Call kernel helper
+ * Call kernel helper or bpf function
*/
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
- func = (u8 *) __bpf_call_base + imm;
- /* Save skb pointer if we need to re-cache skb data */
- if ((ctx->seen & SEEN_SKB) &&
- bpf_helper_changes_pkt_data(func))
- PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
+ /* bpf function call */
+ if (insn[i].src_reg == BPF_PSEUDO_CALL)
+ if (!extra_pass)
+ func = NULL;
+ else if (fp->aux->func && off < fp->aux->func_cnt)
+ /* use the subprog id from the off
+ * field to lookup the callee address
+ */
+ func = (u8 *) fp->aux->func[off]->bpf_func;
+ else
+ return -EINVAL;
+ /* kernel helper call */
+ else
+ func = (u8 *) __bpf_call_base + imm;
bpf_jit_emit_func_call(image, ctx, (u64)func);
/* move return value from r3 to BPF_REG_0 */
PPC_MR(b2p[BPF_REG_0], 3);
-
- /* refresh skb cache */
- if ((ctx->seen & SEEN_SKB) &&
- bpf_helper_changes_pkt_data(func)) {
- /* reload skb pointer to r3 */
- PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
- bpf_jit_emit_skb_loads(image, ctx);
- }
break;
/*
@@ -887,65 +865,6 @@ cond_branch:
break;
/*
- * Loads from packet header/data
- * Assume 32-bit input value in imm and X (src_reg)
- */
-
- /* Absolute loads */
- case BPF_LD | BPF_W | BPF_ABS:
- func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
- goto common_load_abs;
- case BPF_LD | BPF_H | BPF_ABS:
- func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
- goto common_load_abs;
- case BPF_LD | BPF_B | BPF_ABS:
- func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
-common_load_abs:
- /*
- * Load from [imm]
- * Load into r4, which can just be passed onto
- * skb load helpers as the second parameter
- */
- PPC_LI32(4, imm);
- goto common_load;
-
- /* Indirect loads */
- case BPF_LD | BPF_W | BPF_IND:
- func = (u8 *)sk_load_word;
- goto common_load_ind;
- case BPF_LD | BPF_H | BPF_IND:
- func = (u8 *)sk_load_half;
- goto common_load_ind;
- case BPF_LD | BPF_B | BPF_IND:
- func = (u8 *)sk_load_byte;
-common_load_ind:
- /*
- * Load from [src_reg + imm]
- * Treat src_reg as a 32-bit value
- */
- PPC_EXTSW(4, src_reg);
- if (imm) {
- if (imm >= -32768 && imm < 32768)
- PPC_ADDI(4, 4, IMM_L(imm));
- else {
- PPC_LI32(b2p[TMP_REG_1], imm);
- PPC_ADD(4, 4, b2p[TMP_REG_1]);
- }
- }
-
-common_load:
- ctx->seen |= SEEN_SKB;
- ctx->seen |= SEEN_FUNC;
- bpf_jit_emit_func_call(image, ctx, (u64)func);
-
- /*
- * Helper returns 'lt' condition on error, and an
- * appropriate return value in BPF_REG_0
- */
- PPC_BCC(COND_LT, exit_addr);
- break;
-
- /*
* Tail call
*/
case BPF_JMP | BPF_TAIL_CALL:
@@ -971,6 +890,14 @@ common_load:
return 0;
}
+struct powerpc64_jit_data {
+ struct bpf_binary_header *header;
+ u32 *addrs;
+ u8 *image;
+ u32 proglen;
+ struct codegen_context ctx;
+};
+
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;
@@ -978,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
u8 *image = NULL;
u32 *code_base;
u32 *addrs;
+ struct powerpc64_jit_data *jit_data;
struct codegen_context cgctx;
int pass;
int flen;
@@ -985,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *org_fp = fp;
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
+ bool extra_pass = false;
if (!fp->jit_requested)
return org_fp;
@@ -998,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = tmp_fp;
}
+ jit_data = fp->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ fp = org_fp;
+ goto out;
+ }
+ fp->aux->jit_data = jit_data;
+ }
+
flen = fp->len;
+ addrs = jit_data->addrs;
+ if (addrs) {
+ cgctx = jit_data->ctx;
+ image = jit_data->image;
+ bpf_hdr = jit_data->header;
+ proglen = jit_data->proglen;
+ alloclen = proglen + FUNCTION_DESCR_SIZE;
+ extra_pass = true;
+ goto skip_init_ctx;
+ }
+
addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL) {
fp = org_fp;
- goto out;
+ goto out_addrs;
}
memset(&cgctx, 0, sizeof(struct codegen_context));
@@ -1011,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
- goto out;
+ goto out_addrs;
}
/*
@@ -1032,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_fill_ill_insns);
if (!bpf_hdr) {
fp = org_fp;
- goto out;
+ goto out_addrs;
}
+skip_init_ctx:
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
/* Code generation passes 1-2 */
@@ -1042,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+ bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
@@ -1068,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+ if (!fp->is_func || extra_pass) {
+out_addrs:
+ kfree(addrs);
+ kfree(jit_data);
+ fp->aux->jit_data = NULL;
+ } else {
+ jit_data->addrs = addrs;
+ jit_data->ctx = cgctx;
+ jit_data->proglen = proglen;
+ jit_data->image = image;
+ jit_data->header = bpf_hdr;
+ }
out:
- kfree(addrs);
-
if (bpf_blinded)
bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile
index e2b85ffdbb0c..8cab6deb0403 100644
--- a/arch/s390/net/Makefile
+++ b/arch/s390/net/Makefile
@@ -2,5 +2,5 @@
#
# Arch-specific network modules
#
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
obj-$(CONFIG_HAVE_PNETID) += pnet.o
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
deleted file mode 100644
index 9f794869c1b0..000000000000
--- a/arch/s390/net/bpf_jit.S
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * BPF Jit compiler for s390, help functions.
- *
- * Copyright IBM Corp. 2012,2015
- *
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- * Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/nospec-insn.h>
-#include "bpf_jit.h"
-
-/*
- * Calling convention:
- * registers %r7-%r10, %r11,%r13, and %r15 are call saved
- *
- * Input (64 bit):
- * %r3 (%b2) = offset into skb data
- * %r6 (%b5) = return address
- * %r7 (%b6) = skb pointer
- * %r12 = skb data pointer
- *
- * Output:
- * %r14= %b0 = return value (read skb value)
- *
- * Work registers: %r2,%r4,%r5,%r14
- *
- * skb_copy_bits takes 4 parameters:
- * %r2 = skb pointer
- * %r3 = offset into skb data
- * %r4 = pointer to temp buffer
- * %r5 = length to copy
- * Return value in %r2: 0 = ok
- *
- * bpf_internal_load_pointer_neg_helper takes 3 parameters:
- * %r2 = skb pointer
- * %r3 = offset into data
- * %r4 = length to copy
- * Return value in %r2: Pointer to data
- */
-
-#define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */
-
-/*
- * Load SIZE bytes from SKB
- */
-#define sk_load_common(NAME, SIZE, LOAD) \
-ENTRY(sk_load_##NAME); \
- ltgr %r3,%r3; /* Is offset negative? */ \
- jl sk_load_##NAME##_slow_neg; \
-ENTRY(sk_load_##NAME##_pos); \
- aghi %r3,SIZE; /* Offset + SIZE */ \
- clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \
- jh sk_load_##NAME##_slow; \
- LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \
- B_EX OFF_OK,%r6; /* Return */ \
- \
-sk_load_##NAME##_slow:; \
- lgr %r2,%r7; /* Arg1 = skb pointer */ \
- aghi %r3,-SIZE; /* Arg2 = offset */ \
- la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \
- lghi %r5,SIZE; /* Arg4 = size */ \
- brasl %r14,skb_copy_bits; /* Get data from skb */ \
- LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \
- ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \
- BR_EX %r6; /* Return */
-
-sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */
-sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */
-
- GEN_BR_THUNK %r6
- GEN_B_THUNK OFF_OK,%r6
-
-/*
- * Load 1 byte from SKB (optimized version)
- */
- /* r14 = *(u8 *) (skb->data+offset) */
-ENTRY(sk_load_byte)
- ltgr %r3,%r3 # Is offset negative?
- jl sk_load_byte_slow_neg
-ENTRY(sk_load_byte_pos)
- clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen?
- jnl sk_load_byte_slow
- llgc %r14,0(%r3,%r12) # Get byte from skb
- B_EX OFF_OK,%r6 # Return OK
-
-sk_load_byte_slow:
- lgr %r2,%r7 # Arg1 = skb pointer
- # Arg2 = offset
- la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer
- lghi %r5,1 # Arg4 = size (1 byte)
- brasl %r14,skb_copy_bits # Get data from skb
- llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer
- ltgr %r2,%r2 # Set cc to (%r2 != 0)
- BR_EX %r6 # Return cc
-
-#define sk_negative_common(NAME, SIZE, LOAD) \
-sk_load_##NAME##_slow_neg:; \
- cgfi %r3,SKF_MAX_NEG_OFF; \
- jl bpf_error; \
- lgr %r2,%r7; /* Arg1 = skb pointer */ \
- /* Arg2 = offset */ \
- lghi %r4,SIZE; /* Arg3 = size */ \
- brasl %r14,bpf_internal_load_pointer_neg_helper; \
- ltgr %r2,%r2; \
- jz bpf_error; \
- LOAD %r14,0(%r2); /* Get data from pointer */ \
- xr %r3,%r3; /* Set cc to zero */ \
- BR_EX %r6; /* Return cc */
-
-sk_negative_common(word, 4, llgf)
-sk_negative_common(half, 2, llgh)
-sk_negative_common(byte, 1, llgc)
-
-bpf_error:
-# force a return 0 from jit handler
- ltgr %r15,%r15 # Set condition code
- BR_EX %r6
diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index 5e1e5133132d..7822ea92e54a 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -16,9 +16,6 @@
#include <linux/filter.h>
#include <linux/types.h>
-extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[];
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
-
#endif /* __ASSEMBLY__ */
/*
@@ -36,15 +33,6 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* | | |
* | BPF stack | |
* | | |
- * +---------------+ |
- * | 8 byte skbp | |
- * R15+176 -> +---------------+ |
- * | 8 byte hlen | |
- * R15+168 -> +---------------+ |
- * | 4 byte align | |
- * +---------------+ |
- * | 4 byte temp | |
- * | for bpf_jit.S | |
* R15+160 -> +---------------+ |
* | new backchain | |
* R15+152 -> +---------------+ |
@@ -57,17 +45,11 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
* The stack size used by the BPF program ("BPF stack" above) is passed
* via "aux->stack_depth".
*/
-#define STK_SPACE_ADD (8 + 8 + 4 + 4 + 160)
+#define STK_SPACE_ADD (160)
#define STK_160_UNUSED (160 - 12 * 8)
#define STK_OFF (STK_SPACE_ADD - STK_160_UNUSED)
-#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */
-#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */
-#define STK_OFF_SKBP 176 /* Offset of SKB pointer on stack */
#define STK_OFF_R6 (160 - 11 * 8) /* Offset of r6 on stack */
#define STK_OFF_TCCNT (160 - 12 * 8) /* Offset of tail_call_cnt on stack */
-/* Offset to skip condition code check */
-#define OFF_OK 4
-
#endif /* __ARCH_S390_NET_BPF_JIT_H */
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index dd2bcf0e7d00..d2db8acb1a55 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -51,23 +51,21 @@ struct bpf_jit {
#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
-#define SEEN_SKB 1 /* skb access */
-#define SEEN_MEM 2 /* use mem[] for temporary storage */
-#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL 8 /* code uses literals */
-#define SEEN_FUNC 16 /* calls C functions */
-#define SEEN_TAIL_CALL 32 /* code uses tail calls */
-#define SEEN_REG_AX 64 /* code uses constant blinding */
-#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
+#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
+#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
+#define SEEN_LITERAL (1 << 2) /* code uses literals */
+#define SEEN_FUNC (1 << 3) /* calls C functions */
+#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
+#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
+#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
* s390 registers
*/
#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */
#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */
-#define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */
-#define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */
-#define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */
+#define REG_L (MAX_BPF_JIT_REG + 2) /* Literal pool register */
+#define REG_15 (MAX_BPF_JIT_REG + 3) /* Register 15 */
#define REG_0 REG_W0 /* Register 0 */
#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
@@ -92,10 +90,8 @@ static const int reg2hex[] = {
[BPF_REG_9] = 10,
/* BPF stack pointer */
[BPF_REG_FP] = 13,
- /* Register for blinding (shared with REG_SKB_DATA) */
+ /* Register for blinding */
[BPF_REG_AX] = 12,
- /* SKB data pointer */
- [REG_SKB_DATA] = 12,
/* Work registers for s390x backend */
[REG_W0] = 0,
[REG_W1] = 1,
@@ -402,27 +398,6 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
}
/*
- * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S"
- * we store the SKB header length on the stack and the SKB data
- * pointer in REG_SKB_DATA if BPF_REG_AX is not used.
- */
-static void emit_load_skb_data_hlen(struct bpf_jit *jit)
-{
- /* Header length: llgf %w1,<len>(%b1) */
- EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1,
- offsetof(struct sk_buff, len));
- /* s %w1,<data_len>(%b1) */
- EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1,
- offsetof(struct sk_buff, data_len));
- /* stg %w1,ST_OFF_HLEN(%r0,%r15) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN);
- if (!(jit->seen & SEEN_REG_AX))
- /* lg %skb_data,data_off(%b1) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
- BPF_REG_1, offsetof(struct sk_buff, data));
-}
-
-/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
@@ -462,12 +437,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
}
- if (jit->seen & SEEN_SKB) {
- emit_load_skb_data_hlen(jit);
- /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
- STK_OFF_SKBP);
- }
}
/*
@@ -537,12 +506,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
struct bpf_insn *insn = &fp->insnsi[i];
int jmp_off, last, insn_count = 1;
- unsigned int func_addr, mask;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
+ unsigned int mask;
if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
jit->seen |= SEEN_REG_AX;
@@ -1029,13 +998,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
}
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
- if ((jit->seen & SEEN_SKB) &&
- bpf_helper_changes_pkt_data((void *)func)) {
- /* lg %b1,ST_OFF_SKBP(%r15) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
- REG_15, STK_OFF_SKBP);
- emit_load_skb_data_hlen(jit);
- }
break;
}
case BPF_JMP | BPF_TAIL_CALL:
@@ -1235,73 +1197,6 @@ branch_oc:
jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
break;
- /*
- * BPF_LD
- */
- case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */
- case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */
- if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
- func_addr = __pa(sk_load_byte_pos);
- else
- func_addr = __pa(sk_load_byte);
- goto call_fn;
- case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */
- case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */
- if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
- func_addr = __pa(sk_load_half_pos);
- else
- func_addr = __pa(sk_load_half);
- goto call_fn;
- case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */
- case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */
- if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0))
- func_addr = __pa(sk_load_word_pos);
- else
- func_addr = __pa(sk_load_word);
- goto call_fn;
-call_fn:
- jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC;
- REG_SET_SEEN(REG_14); /* Return address of possible func call */
-
- /*
- * Implicit input:
- * BPF_REG_6 (R7) : skb pointer
- * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX)
- *
- * Calculated input:
- * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb
- * BPF_REG_5 (R6) : return address
- *
- * Output:
- * BPF_REG_0 (R14): data read from skb
- *
- * Scratch registers (BPF_REG_1-5)
- */
-
- /* Call function: llilf %w1,func_addr */
- EMIT6_IMM(0xc00f0000, REG_W1, func_addr);
-
- /* Offset: lgfi %b2,imm */
- EMIT6_IMM(0xc0010000, BPF_REG_2, imm);
- if (BPF_MODE(insn->code) == BPF_IND)
- /* agfr %b2,%src (%src is s32 here) */
- EMIT4(0xb9180000, BPF_REG_2, src_reg);
-
- /* Reload REG_SKB_DATA if BPF_REG_AX is used */
- if (jit->seen & SEEN_REG_AX)
- /* lg %skb_data,data_off(%b6) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0,
- BPF_REG_6, offsetof(struct sk_buff, data));
- /* basr %b5,%w1 (%b5 is call saved) */
- EMIT2(0x0d00, BPF_REG_5, REG_W1);
-
- /*
- * Note: For fast access we jump directly after the
- * jnz instruction from bpf_jit.S
- */
- /* jnz <ret0> */
- EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg);
- break;
default: /* too complex, give up */
pr_err("Unknown opcode %02x\n", insn->code);
return -1;
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile
index 76fa8e95b721..d32aac3a25b8 100644
--- a/arch/sparc/net/Makefile
+++ b/arch/sparc/net/Makefile
@@ -1,4 +1,7 @@
#
# Arch-specific network modules
#
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o
+ifeq ($(BITS),32)
+obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o
+endif
diff --git a/arch/sparc/net/bpf_jit_64.h b/arch/sparc/net/bpf_jit_64.h
index 428f7fd19175..fbc836f1c51c 100644
--- a/arch/sparc/net/bpf_jit_64.h
+++ b/arch/sparc/net/bpf_jit_64.h
@@ -33,35 +33,6 @@
#define I5 0x1d
#define FP 0x1e
#define I7 0x1f
-
-#define r_SKB L0
-#define r_HEADLEN L4
-#define r_SKB_DATA L5
-#define r_TMP G1
-#define r_TMP2 G3
-
-/* assembly code in arch/sparc/net/bpf_jit_asm_64.S */
-extern u32 bpf_jit_load_word[];
-extern u32 bpf_jit_load_half[];
-extern u32 bpf_jit_load_byte[];
-extern u32 bpf_jit_load_byte_msh[];
-extern u32 bpf_jit_load_word_positive_offset[];
-extern u32 bpf_jit_load_half_positive_offset[];
-extern u32 bpf_jit_load_byte_positive_offset[];
-extern u32 bpf_jit_load_byte_msh_positive_offset[];
-extern u32 bpf_jit_load_word_negative_offset[];
-extern u32 bpf_jit_load_half_negative_offset[];
-extern u32 bpf_jit_load_byte_negative_offset[];
-extern u32 bpf_jit_load_byte_msh_negative_offset[];
-
-#else
-#define r_RESULT %o0
-#define r_SKB %o0
-#define r_OFF %o1
-#define r_HEADLEN %l4
-#define r_SKB_DATA %l5
-#define r_TMP %g1
-#define r_TMP2 %g3
#endif
#endif /* _BPF_JIT_H */
diff --git a/arch/sparc/net/bpf_jit_asm_64.S b/arch/sparc/net/bpf_jit_asm_64.S
deleted file mode 100644
index 7177867052a1..000000000000
--- a/arch/sparc/net/bpf_jit_asm_64.S
+++ /dev/null
@@ -1,162 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <asm/ptrace.h>
-
-#include "bpf_jit_64.h"
-
-#define SAVE_SZ 176
-#define SCRATCH_OFF STACK_BIAS + 128
-#define BE_PTR(label) be,pn %xcc, label
-#define SIGN_EXTEND(reg) sra reg, 0, reg
-
-#define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */
-
- .text
- .globl bpf_jit_load_word
-bpf_jit_load_word:
- cmp r_OFF, 0
- bl bpf_slow_path_word_neg
- nop
- .globl bpf_jit_load_word_positive_offset
-bpf_jit_load_word_positive_offset:
- sub r_HEADLEN, r_OFF, r_TMP
- cmp r_TMP, 3
- ble bpf_slow_path_word
- add r_SKB_DATA, r_OFF, r_TMP
- andcc r_TMP, 3, %g0
- bne load_word_unaligned
- nop
- retl
- ld [r_TMP], r_RESULT
-load_word_unaligned:
- ldub [r_TMP + 0x0], r_OFF
- ldub [r_TMP + 0x1], r_TMP2
- sll r_OFF, 8, r_OFF
- or r_OFF, r_TMP2, r_OFF
- ldub [r_TMP + 0x2], r_TMP2
- sll r_OFF, 8, r_OFF
- or r_OFF, r_TMP2, r_OFF
- ldub [r_TMP + 0x3], r_TMP2
- sll r_OFF, 8, r_OFF
- retl
- or r_OFF, r_TMP2, r_RESULT
-
- .globl bpf_jit_load_half
-bpf_jit_load_half:
- cmp r_OFF, 0
- bl bpf_slow_path_half_neg
- nop
- .globl bpf_jit_load_half_positive_offset
-bpf_jit_load_half_positive_offset:
- sub r_HEADLEN, r_OFF, r_TMP
- cmp r_TMP, 1
- ble bpf_slow_path_half
- add r_SKB_DATA, r_OFF, r_TMP
- andcc r_TMP, 1, %g0
- bne load_half_unaligned
- nop
- retl
- lduh [r_TMP], r_RESULT
-load_half_unaligned:
- ldub [r_TMP + 0x0], r_OFF
- ldub [r_TMP + 0x1], r_TMP2
- sll r_OFF, 8, r_OFF
- retl
- or r_OFF, r_TMP2, r_RESULT
-
- .globl bpf_jit_load_byte
-bpf_jit_load_byte:
- cmp r_OFF, 0
- bl bpf_slow_path_byte_neg
- nop
- .globl bpf_jit_load_byte_positive_offset
-bpf_jit_load_byte_positive_offset:
- cmp r_OFF, r_HEADLEN
- bge bpf_slow_path_byte
- nop
- retl
- ldub [r_SKB_DATA + r_OFF], r_RESULT
-
-#define bpf_slow_path_common(LEN) \
- save %sp, -SAVE_SZ, %sp; \
- mov %i0, %o0; \
- mov %i1, %o1; \
- add %fp, SCRATCH_OFF, %o2; \
- call skb_copy_bits; \
- mov (LEN), %o3; \
- cmp %o0, 0; \
- restore;
-
-bpf_slow_path_word:
- bpf_slow_path_common(4)
- bl bpf_error
- ld [%sp + SCRATCH_OFF], r_RESULT
- retl
- nop
-bpf_slow_path_half:
- bpf_slow_path_common(2)
- bl bpf_error
- lduh [%sp + SCRATCH_OFF], r_RESULT
- retl
- nop
-bpf_slow_path_byte:
- bpf_slow_path_common(1)
- bl bpf_error
- ldub [%sp + SCRATCH_OFF], r_RESULT
- retl
- nop
-
-#define bpf_negative_common(LEN) \
- save %sp, -SAVE_SZ, %sp; \
- mov %i0, %o0; \
- mov %i1, %o1; \
- SIGN_EXTEND(%o1); \
- call bpf_internal_load_pointer_neg_helper; \
- mov (LEN), %o2; \
- mov %o0, r_TMP; \
- cmp %o0, 0; \
- BE_PTR(bpf_error); \
- restore;
-
-bpf_slow_path_word_neg:
- sethi %hi(SKF_MAX_NEG_OFF), r_TMP
- cmp r_OFF, r_TMP
- bl bpf_error
- nop
- .globl bpf_jit_load_word_negative_offset
-bpf_jit_load_word_negative_offset:
- bpf_negative_common(4)
- andcc r_TMP, 3, %g0
- bne load_word_unaligned
- nop
- retl
- ld [r_TMP], r_RESULT
-
-bpf_slow_path_half_neg:
- sethi %hi(SKF_MAX_NEG_OFF), r_TMP
- cmp r_OFF, r_TMP
- bl bpf_error
- nop
- .globl bpf_jit_load_half_negative_offset
-bpf_jit_load_half_negative_offset:
- bpf_negative_common(2)
- andcc r_TMP, 1, %g0
- bne load_half_unaligned
- nop
- retl
- lduh [r_TMP], r_RESULT
-
-bpf_slow_path_byte_neg:
- sethi %hi(SKF_MAX_NEG_OFF), r_TMP
- cmp r_OFF, r_TMP
- bl bpf_error
- nop
- .globl bpf_jit_load_byte_negative_offset
-bpf_jit_load_byte_negative_offset:
- bpf_negative_common(1)
- retl
- ldub [r_TMP], r_RESULT
-
-bpf_error:
- /* Make the JIT program itself return zero. */
- ret
- restore %g0, %g0, %o0
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 48a25869349b..222785af550b 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -48,10 +48,6 @@ static void bpf_flush_icache(void *start_, void *end_)
}
}
-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG 2 /* ebx is used */
-#define SEEN_MEM 4 /* use mem[] for temporary storage */
-
#define S13(X) ((X) & 0x1fff)
#define S5(X) ((X) & 0x1f)
#define IMMED 0x00002000
@@ -198,7 +194,6 @@ struct jit_ctx {
bool tmp_1_used;
bool tmp_2_used;
bool tmp_3_used;
- bool saw_ld_abs_ind;
bool saw_frame_pointer;
bool saw_call;
bool saw_tail_call;
@@ -207,9 +202,7 @@ struct jit_ctx {
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
-#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2)
-#define SKB_DATA_REG (MAX_BPF_JIT_REG + 3)
-#define TMP_REG_3 (MAX_BPF_JIT_REG + 4)
+#define TMP_REG_3 (MAX_BPF_JIT_REG + 2)
/* Map BPF registers to SPARC registers */
static const int bpf2sparc[] = {
@@ -238,9 +231,6 @@ static const int bpf2sparc[] = {
[TMP_REG_1] = G1,
[TMP_REG_2] = G2,
[TMP_REG_3] = G3,
-
- [SKB_HLEN_REG] = L4,
- [SKB_DATA_REG] = L5,
};
static void emit(const u32 insn, struct jit_ctx *ctx)
@@ -800,25 +790,6 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
return 0;
}
-static void load_skb_regs(struct jit_ctx *ctx, u8 r_skb)
-{
- const u8 r_headlen = bpf2sparc[SKB_HLEN_REG];
- const u8 r_data = bpf2sparc[SKB_DATA_REG];
- const u8 r_tmp = bpf2sparc[TMP_REG_1];
- unsigned int off;
-
- off = offsetof(struct sk_buff, len);
- emit(LD32I | RS1(r_skb) | S13(off) | RD(r_headlen), ctx);
-
- off = offsetof(struct sk_buff, data_len);
- emit(LD32I | RS1(r_skb) | S13(off) | RD(r_tmp), ctx);
-
- emit(SUB | RS1(r_headlen) | RS2(r_tmp) | RD(r_headlen), ctx);
-
- off = offsetof(struct sk_buff, data);
- emit(LDPTRI | RS1(r_skb) | S13(off) | RD(r_data), ctx);
-}
-
/* Just skip the save instruction and the ctx register move. */
#define BPF_TAILCALL_PROLOGUE_SKIP 16
#define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128)
@@ -857,9 +828,6 @@ static void build_prologue(struct jit_ctx *ctx)
emit_reg_move(I0, O0, ctx);
/* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
-
- if (ctx->saw_ld_abs_ind)
- load_skb_regs(ctx, bpf2sparc[BPF_REG_1]);
}
static void build_epilogue(struct jit_ctx *ctx)
@@ -926,7 +894,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const int i = insn - ctx->prog->insnsi;
const s16 off = insn->off;
const s32 imm = insn->imm;
- u32 *func;
if (insn->src_reg == BPF_REG_FP)
ctx->saw_frame_pointer = true;
@@ -1225,16 +1192,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
u8 *func = ((u8 *)__bpf_call_base) + imm;
ctx->saw_call = true;
- if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
- emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
emit_call((u32 *)func, ctx);
emit_nop(ctx);
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
-
- if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
- load_skb_regs(ctx, L7);
break;
}
@@ -1412,43 +1374,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_nop(ctx);
break;
}
-#define CHOOSE_LOAD_FUNC(K, func) \
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
- case BPF_LD | BPF_ABS | BPF_W:
- func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_word);
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_H:
- func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_half);
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_B:
- func = CHOOSE_LOAD_FUNC(imm, bpf_jit_load_byte);
- goto common_load;
- /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + src + imm)) */
- case BPF_LD | BPF_IND | BPF_W:
- func = bpf_jit_load_word;
- goto common_load;
- case BPF_LD | BPF_IND | BPF_H:
- func = bpf_jit_load_half;
- goto common_load;
-
- case BPF_LD | BPF_IND | BPF_B:
- func = bpf_jit_load_byte;
- common_load:
- ctx->saw_ld_abs_ind = true;
-
- emit_reg_move(bpf2sparc[BPF_REG_6], O0, ctx);
- emit_loadimm(imm, O1, ctx);
-
- if (BPF_MODE(code) == BPF_IND)
- emit_alu(ADD, src, O1, ctx);
-
- emit_call(func, ctx);
- emit_alu_K(SRA, O1, 0, ctx);
-
- emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
- break;
default:
pr_err_once("unknown opcode %02x\n", code);
@@ -1583,12 +1508,11 @@ skip_init_ctx:
build_epilogue(&ctx);
if (bpf_jit_enable > 1)
- pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c%c]\n", pass,
+ pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
image_size - (ctx.idx * 4),
ctx.tmp_1_used ? '1' : ' ',
ctx.tmp_2_used ? '2' : ' ',
ctx.tmp_3_used ? '3' : ' ',
- ctx.saw_ld_abs_ind ? 'L' : ' ',
ctx.saw_frame_pointer ? 'F' : ' ',
ctx.saw_call ? 'C' : ' ',
ctx.saw_tail_call ? 'T' : ' ');
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ed796c92701f..cb6e3a219294 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -140,7 +140,7 @@ config X86
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
- select HAVE_EBPF_JIT if X86_64
+ select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EXIT_THREAD
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 8b38df98548e..f6f6c63da62f 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -308,16 +308,20 @@ do { \
* lfence
* jmp spec_trap
* do_rop:
- * mov %rax,(%rsp)
+ * mov %rax,(%rsp) for x86_64
+ * mov %edx,(%esp) for x86_32
* retq
*
* Without retpolines configured:
*
- * jmp *%rax
+ * jmp *%rax for x86_64
+ * jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
-# define RETPOLINE_RAX_BPF_JIT_SIZE 17
-# define RETPOLINE_RAX_BPF_JIT() \
+# ifdef CONFIG_X86_64
+# define RETPOLINE_RAX_BPF_JIT_SIZE 17
+# define RETPOLINE_RAX_BPF_JIT() \
+do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
EMIT2(0xF3, 0x90); /* pause */ \
@@ -325,11 +329,30 @@ do { \
EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
/* do_rop: */ \
EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
- EMIT1(0xC3); /* retq */
-#else
-# define RETPOLINE_RAX_BPF_JIT_SIZE 2
-# define RETPOLINE_RAX_BPF_JIT() \
- EMIT2(0xFF, 0xE0); /* jmp *%rax */
+ EMIT1(0xC3); /* retq */ \
+} while (0)
+# else /* !CONFIG_X86_64 */
+# define RETPOLINE_EDX_BPF_JIT() \
+do { \
+ EMIT1_off32(0xE8, 7); /* call do_rop */ \
+ /* spec_trap: */ \
+ EMIT2(0xF3, 0x90); /* pause */ \
+ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+ /* do_rop: */ \
+ EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
+ EMIT1(0xC3); /* ret */ \
+} while (0)
+# endif
+#else /* !CONFIG_RETPOLINE */
+# ifdef CONFIG_X86_64
+# define RETPOLINE_RAX_BPF_JIT_SIZE 2
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT2(0xFF, 0xE0); /* jmp *%rax */
+# else /* !CONFIG_X86_64 */
+# define RETPOLINE_EDX_BPF_JIT() \
+ EMIT2(0xFF, 0xE2) /* jmp *%edx */
+# endif
#endif
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index fefb4b619598..59e123da580c 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -1,6 +1,9 @@
#
# Arch-specific network modules
#
-OBJECT_FILES_NON_STANDARD_bpf_jit.o += y
-obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o
+ifeq ($(CONFIG_X86_32),y)
+ obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
+else
+ obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+endif
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
deleted file mode 100644
index b33093f84528..000000000000
--- a/arch/x86/net/bpf_jit.S
+++ /dev/null
@@ -1,154 +0,0 @@
-/* bpf_jit.S : BPF JIT helper functions
- *
- * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-/*
- * Calling convention :
- * rbx : skb pointer (callee saved)
- * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r10 : copy of skb->data
- * r9d : hlen = skb->len - skb->data_len
- */
-#define SKBDATA %r10
-#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
-
-#define FUNC(name) \
- .globl name; \
- .type name, @function; \
- name:
-
-FUNC(sk_load_word)
- test %esi,%esi
- js bpf_slow_path_word_neg
-
-FUNC(sk_load_word_positive_offset)
- mov %r9d,%eax # hlen
- sub %esi,%eax # hlen - offset
- cmp $3,%eax
- jle bpf_slow_path_word
- mov (SKBDATA,%rsi),%eax
- bswap %eax /* ntohl() */
- ret
-
-FUNC(sk_load_half)
- test %esi,%esi
- js bpf_slow_path_half_neg
-
-FUNC(sk_load_half_positive_offset)
- mov %r9d,%eax
- sub %esi,%eax # hlen - offset
- cmp $1,%eax
- jle bpf_slow_path_half
- movzwl (SKBDATA,%rsi),%eax
- rol $8,%ax # ntohs()
- ret
-
-FUNC(sk_load_byte)
- test %esi,%esi
- js bpf_slow_path_byte_neg
-
-FUNC(sk_load_byte_positive_offset)
- cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
- jle bpf_slow_path_byte
- movzbl (SKBDATA,%rsi),%eax
- ret
-
-/* rsi contains offset and can be scratched */
-#define bpf_slow_path_common(LEN) \
- lea 32(%rbp), %rdx;\
- FRAME_BEGIN; \
- mov %rbx, %rdi; /* arg1 == skb */ \
- push %r9; \
- push SKBDATA; \
-/* rsi already has offset */ \
- mov $LEN,%ecx; /* len */ \
- call skb_copy_bits; \
- test %eax,%eax; \
- pop SKBDATA; \
- pop %r9; \
- FRAME_END
-
-
-bpf_slow_path_word:
- bpf_slow_path_common(4)
- js bpf_error
- mov 32(%rbp),%eax
- bswap %eax
- ret
-
-bpf_slow_path_half:
- bpf_slow_path_common(2)
- js bpf_error
- mov 32(%rbp),%ax
- rol $8,%ax
- movzwl %ax,%eax
- ret
-
-bpf_slow_path_byte:
- bpf_slow_path_common(1)
- js bpf_error
- movzbl 32(%rbp),%eax
- ret
-
-#define sk_negative_common(SIZE) \
- FRAME_BEGIN; \
- mov %rbx, %rdi; /* arg1 == skb */ \
- push %r9; \
- push SKBDATA; \
-/* rsi already has offset */ \
- mov $SIZE,%edx; /* size */ \
- call bpf_internal_load_pointer_neg_helper; \
- test %rax,%rax; \
- pop SKBDATA; \
- pop %r9; \
- FRAME_END; \
- jz bpf_error
-
-bpf_slow_path_word_neg:
- cmp SKF_MAX_NEG_OFF, %esi /* test range */
- jl bpf_error /* offset lower -> error */
-
-FUNC(sk_load_word_negative_offset)
- sk_negative_common(4)
- mov (%rax), %eax
- bswap %eax
- ret
-
-bpf_slow_path_half_neg:
- cmp SKF_MAX_NEG_OFF, %esi
- jl bpf_error
-
-FUNC(sk_load_half_negative_offset)
- sk_negative_common(2)
- mov (%rax),%ax
- rol $8,%ax
- movzwl %ax,%eax
- ret
-
-bpf_slow_path_byte_neg:
- cmp SKF_MAX_NEG_OFF, %esi
- jl bpf_error
-
-FUNC(sk_load_byte_negative_offset)
- sk_negative_common(1)
- movzbl (%rax), %eax
- ret
-
-bpf_error:
-# force a return 0 from jit handler
- xor %eax,%eax
- mov (%rbp),%rbx
- mov 8(%rbp),%r13
- mov 16(%rbp),%r14
- mov 24(%rbp),%r15
- add $40, %rbp
- leaveq
- ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index d765acedc05c..8fca446aaef6 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -17,15 +17,6 @@
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
-/*
- * Assembly code in arch/x86/net/bpf_jit.S
- */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
-extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[];
-extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[];
-
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
if (len == 1)
@@ -107,9 +98,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
#define X86_JLE 0x7E
#define X86_JG 0x7F
-#define CHOOSE_LOAD_FUNC(K, func) \
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
-
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
@@ -120,8 +108,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * R9 caches skb->len - skb->data_len
- * R10 caches skb->data, and used for blinding (if enabled)
+ * Also x86-64 register R9 is unused. x86-64 register R10 is
+ * used for blinding (if enabled).
*/
static const int reg2hex[] = {
[BPF_REG_0] = 0, /* RAX */
@@ -196,19 +184,15 @@ static void jit_fill_hole(void *area, unsigned int size)
struct jit_context {
int cleanup_addr; /* Epilogue code offset */
- bool seen_ld_abs;
- bool seen_ax_reg;
};
/* Maximum number of bytes emitted while JITing one eBPF insn */
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-#define AUX_STACK_SPACE \
- (32 /* Space for RBX, R13, R14, R15 */ + \
- 8 /* Space for skb_copy_bits() buffer */)
+#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */
-#define PROLOGUE_SIZE 37
+#define PROLOGUE_SIZE 37
/*
* Emit x86-64 prologue code for BPF program and check its size.
@@ -232,20 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
/* sub rbp, AUX_STACK_SPACE */
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
- /* All classic BPF filters use R6(rbx) save it */
-
/* mov qword ptr [rbp+0],rbx */
EMIT4(0x48, 0x89, 0x5D, 0);
-
- /*
- * bpf_convert_filter() maps classic BPF register X to R7 and uses R8
- * as temporary, so all tcpdump filters need to spill/fill R7(R13) and
- * R8(R14). R9(R15) spill could be made conditional, but there is only
- * one 'bpf_error' return path out of helper functions inside bpf_jit.S
- * The overhead of extra spill is negligible for any filter other
- * than synthetic ones. Therefore not worth adding complexity.
- */
-
/* mov qword ptr [rbp+8],r13 */
EMIT4(0x4C, 0x89, 0x6D, 8);
/* mov qword ptr [rbp+16],r14 */
@@ -353,27 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog)
*pprog = prog;
}
-
-static void emit_load_skb_data_hlen(u8 **pprog)
-{
- u8 *prog = *pprog;
- int cnt = 0;
-
- /*
- * r9d = skb->len - skb->data_len (headlen)
- * r10 = skb->data
- */
- /* mov %r9d, off32(%rdi) */
- EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
-
- /* sub %r9d, off32(%rdi) */
- EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
-
- /* mov %r10, off32(%rdi) */
- EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
- *pprog = prog;
-}
-
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u32 dst_reg, const u32 imm32)
{
@@ -462,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
{
struct bpf_insn *insn = bpf_prog->insnsi;
int insn_cnt = bpf_prog->len;
- bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
- bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0;
@@ -473,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
emit_prologue(&prog, bpf_prog->aux->stack_depth,
bpf_prog_was_classic(bpf_prog));
- if (seen_ld_abs)
- emit_load_skb_data_hlen(&prog);
-
for (i = 0; i < insn_cnt; i++, insn++) {
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
@@ -483,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
u8 b2 = 0, b3 = 0;
s64 jmp_offset;
u8 jmp_cond;
- bool reload_skb_data;
int ilen;
u8 *func;
- if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
- ctx->seen_ax_reg = seen_ax_reg = true;
-
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -916,36 +858,12 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]);
- if (seen_ld_abs) {
- reload_skb_data = bpf_helper_changes_pkt_data(func);
- if (reload_skb_data) {
- EMIT1(0x57); /* push %rdi */
- jmp_offset += 22; /* pop, mov, sub, mov */
- } else {
- EMIT2(0x41, 0x52); /* push %r10 */
- EMIT2(0x41, 0x51); /* push %r9 */
- /*
- * We need to adjust jmp offset, since
- * pop %r9, pop %r10 take 4 bytes after call insn
- */
- jmp_offset += 4;
- }
- }
if (!imm32 || !is_simm32(jmp_offset)) {
pr_err("unsupported BPF func %d addr %p image %p\n",
imm32, func, image);
return -EINVAL;
}
EMIT1_off32(0xE8, jmp_offset);
- if (seen_ld_abs) {
- if (reload_skb_data) {
- EMIT1(0x5F); /* pop %rdi */
- emit_load_skb_data_hlen(&prog);
- } else {
- EMIT2(0x41, 0x59); /* pop %r9 */
- EMIT2(0x41, 0x5A); /* pop %r10 */
- }
- }
break;
case BPF_JMP | BPF_TAIL_CALL:
@@ -1080,60 +998,6 @@ emit_jmp:
}
break;
- case BPF_LD | BPF_IND | BPF_W:
- func = sk_load_word;
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_W:
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
-common_load:
- ctx->seen_ld_abs = seen_ld_abs = true;
- jmp_offset = func - (image + addrs[i]);
- if (!func || !is_simm32(jmp_offset)) {
- pr_err("unsupported BPF func %d addr %p image %p\n",
- imm32, func, image);
- return -EINVAL;
- }
- if (BPF_MODE(insn->code) == BPF_ABS) {
- /* mov %esi, imm32 */
- EMIT1_off32(0xBE, imm32);
- } else {
- /* mov %rsi, src_reg */
- EMIT_mov(BPF_REG_2, src_reg);
- if (imm32) {
- if (is_imm8(imm32))
- /* add %esi, imm8 */
- EMIT3(0x83, 0xC6, imm32);
- else
- /* add %esi, imm32 */
- EMIT2_off32(0x81, 0xC6, imm32);
- }
- }
- /*
- * skb pointer is in R6 (%rbx), it will be copied into
- * %rdi if skb_copy_bits() call is necessary.
- * sk_load_* helpers also use %r10 and %r9d.
- * See bpf_jit.S
- */
- if (seen_ax_reg)
- /* r10 = skb->data, mov %r10, off32(%rbx) */
- EMIT3_off32(0x4c, 0x8b, 0x93,
- offsetof(struct sk_buff, data));
- EMIT1_off32(0xE8, jmp_offset); /* call */
- break;
-
- case BPF_LD | BPF_IND | BPF_H:
- func = sk_load_half;
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_H:
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
- goto common_load;
- case BPF_LD | BPF_IND | BPF_B:
- func = sk_load_byte;
- goto common_load;
- case BPF_LD | BPF_ABS | BPF_B:
- func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
- goto common_load;
-
case BPF_JMP | BPF_EXIT:
if (seen_exit) {
jmp_offset = ctx->cleanup_addr - addrs[i];
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
new file mode 100644
index 000000000000..0cc04e30adc1
--- /dev/null
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -0,0 +1,2419 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
+ *
+ * Author: Wang YanQing (udknight@gmail.com)
+ * The code based on code and ideas from:
+ * Eric Dumazet (eric.dumazet@gmail.com)
+ * and from:
+ * Shubham Bansal <illusionist.neo@gmail.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
+#include <linux/bpf.h>
+
+/*
+ * eBPF prog stack layout:
+ *
+ * high
+ * original ESP => +-----+
+ * | | callee saved registers
+ * +-----+
+ * | ... | eBPF JIT scratch space
+ * BPF_FP,IA32_EBP => +-----+
+ * | ... | eBPF prog stack
+ * +-----+
+ * |RSVD | JIT scratchpad
+ * current ESP => +-----+
+ * | |
+ * | ... | Function call stack
+ * | |
+ * +-----+
+ * low
+ *
+ * The callee saved registers:
+ *
+ * high
+ * original ESP => +------------------+ \
+ * | ebp | |
+ * current EBP => +------------------+ } callee saved registers
+ * | ebx,esi,edi | |
+ * +------------------+ /
+ * low
+ */
+
+static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
+{
+ if (len == 1)
+ *ptr = bytes;
+ else if (len == 2)
+ *(u16 *)ptr = bytes;
+ else {
+ *(u32 *)ptr = bytes;
+ barrier();
+ }
+ return ptr + len;
+}
+
+#define EMIT(bytes, len) \
+ do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
+
+#define EMIT1(b1) EMIT(b1, 1)
+#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
+#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
+#define EMIT4(b1, b2, b3, b4) \
+ EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+
+#define EMIT1_off32(b1, off) \
+ do { EMIT1(b1); EMIT(off, 4); } while (0)
+#define EMIT2_off32(b1, b2, off) \
+ do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
+#define EMIT3_off32(b1, b2, b3, off) \
+ do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+#define EMIT4_off32(b1, b2, b3, b4, off) \
+ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
+
+#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
+
+static bool is_imm8(int value)
+{
+ return value <= 127 && value >= -128;
+}
+
+static bool is_simm32(s64 value)
+{
+ return value == (s64) (s32) value;
+}
+
+#define STACK_OFFSET(k) (k)
+#define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */
+
+#define IA32_EAX (0x0)
+#define IA32_EBX (0x3)
+#define IA32_ECX (0x1)
+#define IA32_EDX (0x2)
+#define IA32_ESI (0x6)
+#define IA32_EDI (0x7)
+#define IA32_EBP (0x5)
+#define IA32_ESP (0x4)
+
+/*
+ * List of x86 cond jumps opcodes (. + s8)
+ * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
+ */
+#define IA32_JB 0x72
+#define IA32_JAE 0x73
+#define IA32_JE 0x74
+#define IA32_JNE 0x75
+#define IA32_JBE 0x76
+#define IA32_JA 0x77
+#define IA32_JL 0x7C
+#define IA32_JGE 0x7D
+#define IA32_JLE 0x7E
+#define IA32_JG 0x7F
+
+/*
+ * Map eBPF registers to IA32 32bit registers or stack scratch space.
+ *
+ * 1. All the registers, R0-R10, are mapped to scratch space on stack.
+ * 2. We need two 64 bit temp registers to do complex operations on eBPF
+ * registers.
+ * 3. For performance reason, the BPF_REG_AX for blinding constant, is
+ * mapped to real hardware register pair, IA32_ESI and IA32_EDI.
+ *
+ * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
+ * registers, we have to map each eBPF registers with two IA32 32 bit regs
+ * or scratch memory space and we have to build eBPF 64 bit register from those.
+ *
+ * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
+ */
+static const u8 bpf2ia32[][2] = {
+ /* Return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
+
+ /* The arguments from eBPF program to in-kernel function */
+ /* Stored on stack scratch space */
+ [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
+ [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
+ [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+ [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
+ [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
+
+ /* Callee saved registers that in-kernel function will preserve */
+ /* Stored on stack scratch space */
+ [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+ [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+ [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+ [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+
+ /* Read only Frame Pointer to access Stack */
+ [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
+
+ /* Temporary register for blinding constants. */
+ [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
+
+ /* Tail call count. Stored on stack scratch space. */
+ [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
+};
+
+#define dst_lo dst[0]
+#define dst_hi dst[1]
+#define src_lo src[0]
+#define src_hi src[1]
+
+#define STACK_ALIGNMENT 8
+/*
+ * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+ * BPF_REG_FP, BPF_REG_AX and Tail call counts.
+ */
+#define SCRATCH_SIZE 96
+
+/* Total stack size used in JITed code */
+#define _STACK_SIZE (stack_depth + SCRATCH_SIZE)
+
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
+
+/* Get the offset of eBPF REGISTERs stored on scratch space. */
+#define STACK_VAR(off) (off)
+
+/* Encode 'dst_reg' register into IA32 opcode 'byte' */
+static u8 add_1reg(u8 byte, u32 dst_reg)
+{
+ return byte + dst_reg;
+}
+
+/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
+static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
+{
+ return byte + dst_reg + (src_reg << 3);
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+ /* Fill whole space with int3 instructions */
+ memset(area, 0xcc, size);
+}
+
+static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
+ u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (dstk) {
+ if (val == 0) {
+ /* xor eax,eax */
+ EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ } else {
+ EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
+ STACK_VAR(dst), val);
+ }
+ } else {
+ if (val == 0)
+ EMIT2(0x33, add_2reg(0xC0, dst, dst));
+ else
+ EMIT2_off32(0xC7, add_1reg(0xC0, dst),
+ val);
+ }
+ *pprog = prog;
+}
+
+/* dst = imm (4 bytes)*/
+static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 sreg = sstk ? IA32_EAX : src;
+
+ if (sstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
+ if (dstk)
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
+ else
+ /* mov dst,sreg */
+ EMIT2(0x89, add_2reg(0xC0, dst, sreg));
+
+ *pprog = prog;
+}
+
+/* dst = src */
+static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
+ const u8 src[], bool dstk,
+ bool sstk, u8 **pprog)
+{
+ emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
+ if (is64)
+ /* complete 8 byte move */
+ emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
+ else
+ /* zero out high 4 bytes */
+ emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
+}
+
+/* Sign extended move */
+static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
+ const u32 val, bool dstk, u8 **pprog)
+{
+ u32 hi = 0;
+
+ if (is64 && (val & (1<<31)))
+ hi = (u32)~0;
+ emit_ia32_mov_i(dst_lo, val, dstk, pprog);
+ emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst * src
+ */
+static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 sreg = sstk ? IA32_ECX : src;
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+ else
+ /* mov eax,dst */
+ EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
+
+
+ EMIT2(0xF7, add_1reg(0xE0, sreg));
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ else
+ /* mov dst,eax */
+ EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
+
+ *pprog = prog;
+}
+
+static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk && val != 64) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ switch (val) {
+ case 16:
+ /*
+ * Emit 'movzwl eax,ax' to zero extend 16-bit
+ * into 64 bit
+ */
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 32:
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 64:
+ /* nop */
+ break;
+ }
+
+ if (dstk && val != 64) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ switch (val) {
+ case 16:
+ /* Emit 'ror %ax, 8' to swap lower 2 bytes */
+ EMIT1(0x66);
+ EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
+
+ EMIT2(0x0F, 0xB7);
+ EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
+
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 32:
+ /* Emit 'bswap eax' to swap lower 4 bytes */
+ EMIT1(0x0F);
+ EMIT1(add_1reg(0xC8, dreg_lo));
+
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ break;
+ case 64:
+ /* Emit 'bswap eax' to swap lower 4 bytes */
+ EMIT1(0x0F);
+ EMIT1(add_1reg(0xC8, dreg_lo));
+
+ /* Emit 'bswap edx' to swap lower 4 bytes */
+ EMIT1(0x0F);
+ EMIT1(add_1reg(0xC8, dreg_hi));
+
+ /* mov ecx,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
+ /* mov dreg_hi,dreg_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+ /* mov dreg_lo,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
+
+ break;
+ }
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (div|mod) src
+ */
+static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src));
+ else if (src != IA32_ECX)
+ /* mov ecx,src */
+ EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ else
+ /* mov eax,dst */
+ EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
+
+ /* xor edx,edx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
+ /* div ecx */
+ EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
+
+ if (op == BPF_MOD) {
+ if (dstk)
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst));
+ else
+ EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
+ } else {
+ if (dstk)
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst));
+ else
+ EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
+ }
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (shift) src
+ */
+static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg = dstk ? IA32_EAX : dst;
+ u8 b2;
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
+ else if (src != IA32_ECX)
+ /* mov ecx,src */
+ EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
+
+ switch (op) {
+ case BPF_LSH:
+ b2 = 0xE0; break;
+ case BPF_RSH:
+ b2 = 0xE8; break;
+ case BPF_ARSH:
+ b2 = 0xF8; break;
+ default:
+ return;
+ }
+ EMIT2(0xD3, add_1reg(b2, dreg));
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],dreg */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (op) src
+ */
+static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
+ const u8 dst, const u8 src, bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 sreg = sstk ? IA32_EAX : src;
+ u8 dreg = dstk ? IA32_EDX : dst;
+
+ if (sstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
+
+ switch (BPF_OP(op)) {
+ /* dst = dst + src */
+ case BPF_ADD:
+ if (hi && is64)
+ EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
+ else
+ EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst - src */
+ case BPF_SUB:
+ if (hi && is64)
+ EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
+ else
+ EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst | src */
+ case BPF_OR:
+ EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst & src */
+ case BPF_AND:
+ EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst ^ src */
+ case BPF_XOR:
+ EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
+ break;
+ }
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],dreg */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
+ STACK_VAR(dst));
+ *pprog = prog;
+}
+
+/* ALU operation (64 bit) */
+static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
+ const u8 dst[], const u8 src[],
+ bool dstk, bool sstk,
+ u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
+ if (is64)
+ emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
+ &prog);
+ else
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ *pprog = prog;
+}
+
+/*
+ * ALU operation (32 bit)
+ * dst = dst (op) val
+ */
+static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
+ const u8 dst, const s32 val, bool dstk,
+ u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg = dstk ? IA32_EAX : dst;
+ u8 sreg = IA32_EDX;
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
+
+ if (!is_imm8(val))
+ /* mov edx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
+
+ switch (op) {
+ /* dst = dst + val */
+ case BPF_ADD:
+ if (hi && is64) {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xD0, dreg), val);
+ else
+ EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
+ } else {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xC0, dreg), val);
+ else
+ EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
+ }
+ break;
+ /* dst = dst - val */
+ case BPF_SUB:
+ if (hi && is64) {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xD8, dreg), val);
+ else
+ EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
+ } else {
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xE8, dreg), val);
+ else
+ EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
+ }
+ break;
+ /* dst = dst | val */
+ case BPF_OR:
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xC8, dreg), val);
+ else
+ EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst & val */
+ case BPF_AND:
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xE0, dreg), val);
+ else
+ EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
+ break;
+ /* dst = dst ^ val */
+ case BPF_XOR:
+ if (is_imm8(val))
+ EMIT3(0x83, add_1reg(0xF0, dreg), val);
+ else
+ EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
+ break;
+ case BPF_NEG:
+ EMIT2(0xF7, add_1reg(0xD8, dreg));
+ break;
+ }
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],dreg */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
+ STACK_VAR(dst));
+ *pprog = prog;
+}
+
+/* ALU operation (64 bit) */
+static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
+ const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ u32 hi = 0;
+
+ if (is64 && (val & (1<<31)))
+ hi = (u32)~0;
+
+ emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
+ if (is64)
+ emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
+ else
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+
+ *pprog = prog;
+}
+
+/* dst = ~dst (64 bit) */
+static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ /* xor ecx,ecx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* sub dreg_lo,ecx */
+ EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
+ /* mov dreg_lo,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
+
+ /* xor ecx,ecx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* sbb dreg_hi,ecx */
+ EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
+ /* mov dreg_hi,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/* dst = dst << src */
+static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ static int jmp_label1 = -1;
+ static int jmp_label2 = -1;
+ static int jmp_label3 = -1;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ else
+ /* mov ecx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* Jumps when >= 32 */
+ if (is_imm8(jmp_label(jmp_label1, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+ /* < 32 */
+ /* shl dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
+ /* mov ebx,dreg_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shl dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
+
+ /* IA32_ECX = -IA32_ECX + 32 */
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shr ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
+ /* or dreg_hi,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 32 */
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+
+ /* cmp ecx,64 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+ /* Jumps when >= 64 */
+ if (is_imm8(jmp_label(jmp_label2, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+ /* >= 32 && < 64 */
+ /* sub ecx,32 */
+ EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+ /* shl dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
+ /* mov dreg_hi,dreg_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 64 */
+ if (jmp_label2 == -1)
+ jmp_label2 = cnt;
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+ if (jmp_label3 == -1)
+ jmp_label3 = cnt;
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ /* out: */
+ *pprog = prog;
+}
+
+/* dst = dst >> src (signed)*/
+static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
+ bool dstk, bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ static int jmp_label1 = -1;
+ static int jmp_label2 = -1;
+ static int jmp_label3 = -1;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ else
+ /* mov ecx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* Jumps when >= 32 */
+ if (is_imm8(jmp_label(jmp_label1, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+ /* < 32 */
+ /* lshr dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* ashr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
+
+ /* IA32_ECX = -IA32_ECX + 32 */
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 32 */
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+
+ /* cmp ecx,64 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+ /* Jumps when >= 64 */
+ if (is_imm8(jmp_label(jmp_label2, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+ /* >= 32 && < 64 */
+ /* sub ecx,32 */
+ EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+ /* ashr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 64 */
+ if (jmp_label2 == -1)
+ jmp_label2 = cnt;
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+ if (jmp_label3 == -1)
+ jmp_label3 = cnt;
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ /* out: */
+ *pprog = prog;
+}
+
+/* dst = dst >> src */
+static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ static int jmp_label1 = -1;
+ static int jmp_label2 = -1;
+ static int jmp_label3 = -1;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk)
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ else
+ /* mov ecx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
+
+ /* cmp ecx,32 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
+ /* Jumps when >= 32 */
+ if (is_imm8(jmp_label(jmp_label1, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
+
+ /* < 32 */
+ /* lshr dreg_lo,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
+
+ /* IA32_ECX = -IA32_ECX + 32 */
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 32 */
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+ /* cmp ecx,64 */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
+ /* Jumps when >= 64 */
+ if (is_imm8(jmp_label(jmp_label2, 2)))
+ EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
+ else
+ EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
+
+ /* >= 32 && < 64 */
+ /* sub ecx,32 */
+ EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
+ /* shr dreg_hi,cl */
+ EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+ /* goto out; */
+ if (is_imm8(jmp_label(jmp_label3, 2)))
+ EMIT2(0xEB, jmp_label(jmp_label3, 2));
+ else
+ EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
+
+ /* >= 64 */
+ if (jmp_label2 == -1)
+ jmp_label2 = cnt;
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+
+ if (jmp_label3 == -1)
+ jmp_label3 = cnt;
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ /* out: */
+ *pprog = prog;
+}
+
+/* dst = dst << val */
+static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ /* Do LSH operation */
+ if (val < 32) {
+ /* shl dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
+ /* mov ebx,dreg_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ /* shl dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
+
+ /* IA32_ECX = 32 - val */
+ /* mov ecx,val */
+ EMIT2(0xB1, val);
+ /* movzx ecx,ecx */
+ EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shr ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
+ /* or dreg_hi,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ } else if (val >= 32 && val < 64) {
+ u32 value = val - 32;
+
+ /* shl dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
+ /* mov dreg_hi,dreg_lo */
+ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ } else {
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ }
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/* dst = dst >> val */
+static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ /* Do RSH operation */
+ if (val < 32) {
+ /* shr dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* shr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
+
+ /* IA32_ECX = 32 - val */
+ /* mov ecx,val */
+ EMIT2(0xB1, val);
+ /* movzx ecx,ecx */
+ EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ } else if (val >= 32 && val < 64) {
+ u32 value = val - 32;
+
+ /* shr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ } else {
+ /* xor dreg_lo,dreg_lo */
+ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
+ /* xor dreg_hi,dreg_hi */
+ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
+ }
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+/* dst = dst >> val (signed) */
+static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ /* Do RSH operation */
+ if (val < 32) {
+ /* shr dreg_lo,imm8 */
+ EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
+ /* mov ebx,dreg_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
+
+ /* IA32_ECX = 32 - val */
+ /* mov ecx,val */
+ EMIT2(0xB1, val);
+ /* movzx ecx,ecx */
+ EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
+ /* neg ecx */
+ EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
+ /* add ecx,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
+
+ /* shl ebx,cl */
+ EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
+ /* or dreg_lo,ebx */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
+ } else if (val >= 32 && val < 64) {
+ u32 value = val - 32;
+
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+ } else {
+ /* ashr dreg_hi,imm8 */
+ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
+ /* mov dreg_lo,dreg_hi */
+ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
+ }
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],dreg_lo */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],dreg_hi */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
+ STACK_VAR(dst_hi));
+ }
+ *pprog = prog;
+}
+
+static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_hi));
+ else
+ /* mov eax,dst_hi */
+ EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
+
+ if (sstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
+ else
+ /* mul src_lo */
+ EMIT2(0xF7, add_1reg(0xE0, src_lo));
+
+ /* mov ecx,eax */
+ EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ if (sstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
+ else
+ /* mul src_hi */
+ EMIT2(0xF7, add_1reg(0xE0, src_hi));
+
+ /* add eax,eax */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ if (sstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
+ else
+ /* mul src_lo */
+ EMIT2(0xF7, add_1reg(0xE0, src_lo));
+
+ /* add ecx,edx */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],ecx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(dst_hi));
+ } else {
+ /* mov dst_lo,eax */
+ EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
+ /* mov dst_hi,ecx */
+ EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
+ }
+
+ *pprog = prog;
+}
+
+static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
+ bool dstk, u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ u32 hi;
+
+ hi = val & (1<<31) ? (u32)~0 : 0;
+ /* movl eax,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
+ if (dstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
+ else
+ /* mul dst_hi */
+ EMIT2(0xF7, add_1reg(0xE0, dst_hi));
+
+ /* mov ecx,eax */
+ EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ /* movl eax,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
+ if (dstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
+ else
+ /* mul dst_lo */
+ EMIT2(0xF7, add_1reg(0xE0, dst_lo));
+ /* add ecx,eax */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
+
+ /* movl eax,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
+ if (dstk)
+ /* mul dword ptr [ebp+off] */
+ EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
+ else
+ /* mul dst_lo */
+ EMIT2(0xF7, add_1reg(0xE0, dst_lo));
+
+ /* add ecx,edx */
+ EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
+
+ if (dstk) {
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ /* mov dword ptr [ebp+off],ecx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(dst_hi));
+ } else {
+ /* mov dword ptr [ebp+off],eax */
+ EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
+ /* mov dword ptr [ebp+off],ecx */
+ EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
+ }
+
+ *pprog = prog;
+}
+
+static int bpf_size_to_x86_bytes(int bpf_size)
+{
+ if (bpf_size == BPF_W)
+ return 4;
+ else if (bpf_size == BPF_H)
+ return 2;
+ else if (bpf_size == BPF_B)
+ return 1;
+ else if (bpf_size == BPF_DW)
+ return 4; /* imm32 */
+ else
+ return 0;
+}
+
+struct jit_context {
+ int cleanup_addr; /* Epilogue code offset */
+};
+
+/* Maximum number of bytes emitted while JITing one eBPF insn */
+#define BPF_MAX_INSN_SIZE 128
+#define BPF_INSN_SAFETY 64
+
+#define PROLOGUE_SIZE 35
+
+/*
+ * Emit prologue code for BPF program and check it's size.
+ * bpf_tail_call helper will skip it while jumping into another program.
+ */
+static void emit_prologue(u8 **pprog, u32 stack_depth)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ const u8 *r1 = bpf2ia32[BPF_REG_1];
+ const u8 fplo = bpf2ia32[BPF_REG_FP][0];
+ const u8 fphi = bpf2ia32[BPF_REG_FP][1];
+ const u8 *tcc = bpf2ia32[TCALL_CNT];
+
+ /* push ebp */
+ EMIT1(0x55);
+ /* mov ebp,esp */
+ EMIT2(0x89, 0xE5);
+ /* push edi */
+ EMIT1(0x57);
+ /* push esi */
+ EMIT1(0x56);
+ /* push ebx */
+ EMIT1(0x53);
+
+ /* sub esp,STACK_SIZE */
+ EMIT2_off32(0x81, 0xEC, STACK_SIZE);
+ /* sub ebp,SCRATCH_SIZE+4+12*/
+ EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
+ /* xor ebx,ebx */
+ EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
+
+ /* Set up BPF prog stack base register */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
+
+ /* Move BPF_CTX (EAX) to BPF_REG_R1 */
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
+
+ /* Initialize Tail Count */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+ *pprog = prog;
+}
+
+/* Emit epilogue code for BPF program */
+static void emit_epilogue(u8 **pprog, u32 stack_depth)
+{
+ u8 *prog = *pprog;
+ const u8 *r0 = bpf2ia32[BPF_REG_0];
+ int cnt = 0;
+
+ /* mov eax,dword ptr [ebp+off]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
+ /* mov edx,dword ptr [ebp+off]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
+
+ /* add ebp,SCRATCH_SIZE+4+12*/
+ EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
+
+ /* mov ebx,dword ptr [ebp-12]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
+ /* mov esi,dword ptr [ebp-8]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
+ /* mov edi,dword ptr [ebp-4]*/
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
+
+ EMIT1(0xC9); /* leave */
+ EMIT1(0xC3); /* ret */
+ *pprog = prog;
+}
+
+/*
+ * Generate the following code:
+ * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
+ * if (index >= array->map.max_entries)
+ * goto out;
+ * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ * prog = array->ptrs[index];
+ * if (prog == NULL)
+ * goto out;
+ * goto *(prog->bpf_func + prologue_size);
+ * out:
+ */
+static void emit_bpf_tail_call(u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ const u8 *r1 = bpf2ia32[BPF_REG_1];
+ const u8 *r2 = bpf2ia32[BPF_REG_2];
+ const u8 *r3 = bpf2ia32[BPF_REG_3];
+ const u8 *tcc = bpf2ia32[TCALL_CNT];
+ u32 lo, hi;
+ static int jmp_label1 = -1;
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
+ /* mov edx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
+
+ /* cmp dword ptr [eax+off],edx */
+ EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
+ offsetof(struct bpf_array, map.max_entries));
+ /* jbe out */
+ EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ lo = (u32)MAX_TAIL_CALL_CNT;
+ hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+ /* cmp edx,hi */
+ EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
+ EMIT2(IA32_JNE, 3);
+ /* cmp ecx,lo */
+ EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
+
+ /* ja out */
+ EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
+
+ /* add eax,0x1 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
+ /* adc ebx,0x0 */
+ EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
+
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
+ /* mov dword ptr [ebp+off],edx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
+
+ /* prog = array->ptrs[index]; */
+ /* mov edx, [eax + edx * 4 + offsetof(...)] */
+ EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
+
+ /*
+ * if (prog == NULL)
+ * goto out;
+ */
+ /* test edx,edx */
+ EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
+ /* je out */
+ EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ /* mov edx, dword ptr [edx + 32] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
+ offsetof(struct bpf_prog, bpf_func));
+ /* add edx,prologue_size */
+ EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
+
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
+
+ /*
+ * Now we're ready to jump into next BPF program:
+ * eax == ctx (1st arg)
+ * edx == prog->bpf_func + prologue_size
+ */
+ RETPOLINE_EDX_BPF_JIT();
+
+ if (jmp_label1 == -1)
+ jmp_label1 = cnt;
+
+ /* out: */
+ *pprog = prog;
+}
+
+/* Push the scratch stack register on top of the stack. */
+static inline void emit_push_r64(const u8 src[], u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
+ /* push ecx */
+ EMIT1(0x51);
+
+ /* mov ecx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
+ /* push ecx */
+ EMIT1(0x51);
+
+ *pprog = prog;
+}
+
+static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ int oldproglen, struct jit_context *ctx)
+{
+ struct bpf_insn *insn = bpf_prog->insnsi;
+ int insn_cnt = bpf_prog->len;
+ bool seen_exit = false;
+ u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+ int i, cnt = 0;
+ int proglen = 0;
+ u8 *prog = temp;
+
+ emit_prologue(&prog, bpf_prog->aux->stack_depth);
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ const s32 imm32 = insn->imm;
+ const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
+ const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
+ const u8 code = insn->code;
+ const u8 *dst = bpf2ia32[insn->dst_reg];
+ const u8 *src = bpf2ia32[insn->src_reg];
+ const u8 *r0 = bpf2ia32[BPF_REG_0];
+ s64 jmp_offset;
+ u8 jmp_cond;
+ int ilen;
+ u8 *func;
+
+ switch (code) {
+ /* ALU operations */
+ /* dst = src */
+ case BPF_ALU | BPF_MOV | BPF_K:
+ case BPF_ALU | BPF_MOV | BPF_X:
+ case BPF_ALU64 | BPF_MOV | BPF_K:
+ case BPF_ALU64 | BPF_MOV | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_mov_r64(is64, dst, src, dstk,
+ sstk, &prog);
+ break;
+ case BPF_K:
+ /* Sign-extend immediate value to dst reg */
+ emit_ia32_mov_i64(is64, dst, imm32,
+ dstk, &prog);
+ break;
+ }
+ break;
+ /* dst = dst + src/imm */
+ /* dst = dst - src/imm */
+ /* dst = dst | src/imm */
+ /* dst = dst & src/imm */
+ /* dst = dst ^ src/imm */
+ /* dst = dst * src/imm */
+ /* dst = dst << src */
+ /* dst = dst >> src */
+ case BPF_ALU | BPF_ADD | BPF_K:
+ case BPF_ALU | BPF_ADD | BPF_X:
+ case BPF_ALU | BPF_SUB | BPF_K:
+ case BPF_ALU | BPF_SUB | BPF_X:
+ case BPF_ALU | BPF_OR | BPF_K:
+ case BPF_ALU | BPF_OR | BPF_X:
+ case BPF_ALU | BPF_AND | BPF_K:
+ case BPF_ALU | BPF_AND | BPF_X:
+ case BPF_ALU | BPF_XOR | BPF_K:
+ case BPF_ALU | BPF_XOR | BPF_X:
+ case BPF_ALU64 | BPF_ADD | BPF_K:
+ case BPF_ALU64 | BPF_ADD | BPF_X:
+ case BPF_ALU64 | BPF_SUB | BPF_K:
+ case BPF_ALU64 | BPF_SUB | BPF_X:
+ case BPF_ALU64 | BPF_OR | BPF_K:
+ case BPF_ALU64 | BPF_OR | BPF_X:
+ case BPF_ALU64 | BPF_AND | BPF_K:
+ case BPF_ALU64 | BPF_AND | BPF_X:
+ case BPF_ALU64 | BPF_XOR | BPF_K:
+ case BPF_ALU64 | BPF_XOR | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_alu_r64(is64, BPF_OP(code), dst,
+ src, dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ emit_ia32_alu_i64(is64, BPF_OP(code), dst,
+ imm32, dstk, &prog);
+ break;
+ }
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K:
+ case BPF_ALU | BPF_MUL | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_mul_r(dst_lo, src_lo, dstk,
+ sstk, &prog);
+ break;
+ case BPF_K:
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+ imm32);
+ emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
+ false, &prog);
+ break;
+ }
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ case BPF_ALU | BPF_LSH | BPF_X:
+ case BPF_ALU | BPF_RSH | BPF_X:
+ case BPF_ALU | BPF_ARSH | BPF_K:
+ case BPF_ALU | BPF_ARSH | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
+ dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+ imm32);
+ emit_ia32_shift_r(BPF_OP(code), dst_lo,
+ IA32_ECX, dstk, false,
+ &prog);
+ break;
+ }
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ /* dst = dst / src(imm) */
+ /* dst = dst % src(imm) */
+ case BPF_ALU | BPF_DIV | BPF_K:
+ case BPF_ALU | BPF_DIV | BPF_X:
+ case BPF_ALU | BPF_MOD | BPF_K:
+ case BPF_ALU | BPF_MOD | BPF_X:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
+ src_lo, dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
+ imm32);
+ emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
+ IA32_ECX, dstk, false,
+ &prog);
+ break;
+ }
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_K:
+ case BPF_ALU64 | BPF_DIV | BPF_X:
+ case BPF_ALU64 | BPF_MOD | BPF_K:
+ case BPF_ALU64 | BPF_MOD | BPF_X:
+ goto notyet;
+ /* dst = dst >> imm */
+ /* dst = dst << imm */
+ case BPF_ALU | BPF_RSH | BPF_K:
+ case BPF_ALU | BPF_LSH | BPF_K:
+ if (unlikely(imm32 > 31))
+ return -EINVAL;
+ /* mov ecx,imm32*/
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+ emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
+ false, &prog);
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ /* dst = dst << imm */
+ case BPF_ALU64 | BPF_LSH | BPF_K:
+ if (unlikely(imm32 > 63))
+ return -EINVAL;
+ emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = dst >> imm */
+ case BPF_ALU64 | BPF_RSH | BPF_K:
+ if (unlikely(imm32 > 63))
+ return -EINVAL;
+ emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = dst << src */
+ case BPF_ALU64 | BPF_LSH | BPF_X:
+ emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
+ break;
+ /* dst = dst >> src */
+ case BPF_ALU64 | BPF_RSH | BPF_X:
+ emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
+ break;
+ /* dst = dst >> src (signed) */
+ case BPF_ALU64 | BPF_ARSH | BPF_X:
+ emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
+ break;
+ /* dst = dst >> imm (signed) */
+ case BPF_ALU64 | BPF_ARSH | BPF_K:
+ if (unlikely(imm32 > 63))
+ return -EINVAL;
+ emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = ~dst */
+ case BPF_ALU | BPF_NEG:
+ emit_ia32_alu_i(is64, false, BPF_OP(code),
+ dst_lo, 0, dstk, &prog);
+ emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
+ break;
+ /* dst = ~dst (64 bit) */
+ case BPF_ALU64 | BPF_NEG:
+ emit_ia32_neg64(dst, dstk, &prog);
+ break;
+ /* dst = dst * src/imm */
+ case BPF_ALU64 | BPF_MUL | BPF_X:
+ case BPF_ALU64 | BPF_MUL | BPF_K:
+ switch (BPF_SRC(code)) {
+ case BPF_X:
+ emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
+ break;
+ case BPF_K:
+ emit_ia32_mul_i64(dst, imm32, dstk, &prog);
+ break;
+ }
+ break;
+ /* dst = htole(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = htobe(dst) */
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
+ break;
+ /* dst = imm64 */
+ case BPF_LD | BPF_IMM | BPF_DW: {
+ s32 hi, lo = imm32;
+
+ hi = insn[1].imm;
+ emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
+ emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
+ insn++;
+ i++;
+ break;
+ }
+ /* ST: *(u8*)(dst_reg + off) = imm */
+ case BPF_ST | BPF_MEM | BPF_H:
+ case BPF_ST | BPF_MEM | BPF_B:
+ case BPF_ST | BPF_MEM | BPF_W:
+ case BPF_ST | BPF_MEM | BPF_DW:
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ EMIT(0xC6, 1); break;
+ case BPF_H:
+ EMIT2(0x66, 0xC7); break;
+ case BPF_W:
+ case BPF_DW:
+ EMIT(0xC7, 1); break;
+ }
+
+ if (is_imm8(insn->off))
+ EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
+ else
+ EMIT1_off32(add_1reg(0x80, IA32_EAX),
+ insn->off);
+ EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
+
+ if (BPF_SIZE(code) == BPF_DW) {
+ u32 hi;
+
+ hi = imm32 & (1<<31) ? (u32)~0 : 0;
+ EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
+ insn->off + 4);
+ EMIT(hi, 4);
+ }
+ break;
+
+ /* STX: *(u8*)(dst_reg + off) = src_reg */
+ case BPF_STX | BPF_MEM | BPF_B:
+ case BPF_STX | BPF_MEM | BPF_H:
+ case BPF_STX | BPF_MEM | BPF_W:
+ case BPF_STX | BPF_MEM | BPF_DW:
+ if (dstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov eax,dst_lo */
+ EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
+
+ if (sstk)
+ /* mov edx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(src_lo));
+ else
+ /* mov edx,src_lo */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ EMIT(0x88, 1); break;
+ case BPF_H:
+ EMIT2(0x66, 0x89); break;
+ case BPF_W:
+ case BPF_DW:
+ EMIT(0x89, 1); break;
+ }
+
+ if (is_imm8(insn->off))
+ EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
+ insn->off);
+ else
+ EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
+ insn->off);
+
+ if (BPF_SIZE(code) == BPF_DW) {
+ if (sstk)
+ /* mov edi,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(src_hi));
+ else
+ /* mov edi,src_hi */
+ EMIT2(0x8B, add_2reg(0xC0, src_hi,
+ IA32_EDX));
+ EMIT1(0x89);
+ if (is_imm8(insn->off + 4)) {
+ EMIT2(add_2reg(0x40, IA32_EAX,
+ IA32_EDX),
+ insn->off + 4);
+ } else {
+ EMIT1(add_2reg(0x80, IA32_EAX,
+ IA32_EDX));
+ EMIT(insn->off + 4, 4);
+ }
+ }
+ break;
+
+ /* LDX: dst_reg = *(u8*)(src_reg + off) */
+ case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ if (sstk)
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(src_lo));
+ else
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ EMIT2(0x0F, 0xB6); break;
+ case BPF_H:
+ EMIT2(0x0F, 0xB7); break;
+ case BPF_W:
+ case BPF_DW:
+ EMIT(0x8B, 1); break;
+ }
+
+ if (is_imm8(insn->off))
+ EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
+ insn->off);
+ else
+ EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
+ insn->off);
+
+ if (dstk)
+ /* mov dword ptr [ebp+off],edx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_lo));
+ else
+ /* mov dst_lo,edx */
+ EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ case BPF_H:
+ case BPF_W:
+ if (dstk) {
+ EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
+ STACK_VAR(dst_hi));
+ EMIT(0x0, 4);
+ } else {
+ EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
+ }
+ break;
+ case BPF_DW:
+ EMIT2_off32(0x8B,
+ add_2reg(0x80, IA32_EAX, IA32_EDX),
+ insn->off + 4);
+ if (dstk)
+ EMIT3(0x89,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
+ else
+ EMIT2(0x89,
+ add_2reg(0xC0, dst_hi, IA32_EDX));
+ break;
+ default:
+ break;
+ }
+ break;
+ /* call */
+ case BPF_JMP | BPF_CALL:
+ {
+ const u8 *r1 = bpf2ia32[BPF_REG_1];
+ const u8 *r2 = bpf2ia32[BPF_REG_2];
+ const u8 *r3 = bpf2ia32[BPF_REG_3];
+ const u8 *r4 = bpf2ia32[BPF_REG_4];
+ const u8 *r5 = bpf2ia32[BPF_REG_5];
+
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ goto notyet;
+
+ func = (u8 *) __bpf_call_base + imm32;
+ jmp_offset = func - (image + addrs[i]);
+
+ if (!imm32 || !is_simm32(jmp_offset)) {
+ pr_err("unsupported BPF func %d addr %p image %p\n",
+ imm32, func, image);
+ return -EINVAL;
+ }
+
+ /* mov eax,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(r1[0]));
+ /* mov edx,dword ptr [ebp+off] */
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(r1[1]));
+
+ emit_push_r64(r5, &prog);
+ emit_push_r64(r4, &prog);
+ emit_push_r64(r3, &prog);
+ emit_push_r64(r2, &prog);
+
+ EMIT1_off32(0xE8, jmp_offset + 9);
+
+ /* mov dword ptr [ebp+off],eax */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(r0[0]));
+ /* mov dword ptr [ebp+off],edx */
+ EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(r0[1]));
+
+ /* add esp,32 */
+ EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
+ break;
+ }
+ case BPF_JMP | BPF_TAIL_CALL:
+ emit_bpf_tail_call(&prog);
+ break;
+
+ /* cond jump */
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X: {
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+ u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
+ STACK_VAR(src_hi));
+ }
+
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 2);
+ /* cmp dreg_lo,sreg_lo */
+ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+ goto emit_cond_jmp;
+ }
+ case BPF_JMP | BPF_JSET | BPF_X: {
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+ u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
+ STACK_VAR(src_hi));
+ }
+ /* and dreg_lo,sreg_lo */
+ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ goto emit_cond_jmp;
+ }
+ case BPF_JMP | BPF_JSET | BPF_K: {
+ u32 hi;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = IA32_ECX;
+ u8 sreg_hi = IA32_EBX;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+ hi = imm32 & (1<<31) ? (u32)~0 : 0;
+
+ /* mov ecx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+ /* mov ebx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+
+ /* and dreg_lo,sreg_lo */
+ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
+ /* and dreg_hi,sreg_hi */
+ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
+ /* or dreg_lo,dreg_hi */
+ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
+ goto emit_cond_jmp;
+ }
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K: {
+ u32 hi;
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = IA32_ECX;
+ u8 sreg_hi = IA32_EBX;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ hi = imm32 & (1<<31) ? (u32)~0 : 0;
+ /* mov ecx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+ /* mov ebx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 2);
+ /* cmp dreg_lo,sreg_lo */
+ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+
+emit_cond_jmp: /* Convert BPF opcode to x86 */
+ switch (BPF_OP(code)) {
+ case BPF_JEQ:
+ jmp_cond = IA32_JE;
+ break;
+ case BPF_JSET:
+ case BPF_JNE:
+ jmp_cond = IA32_JNE;
+ break;
+ case BPF_JGT:
+ /* GT is unsigned '>', JA in x86 */
+ jmp_cond = IA32_JA;
+ break;
+ case BPF_JLT:
+ /* LT is unsigned '<', JB in x86 */
+ jmp_cond = IA32_JB;
+ break;
+ case BPF_JGE:
+ /* GE is unsigned '>=', JAE in x86 */
+ jmp_cond = IA32_JAE;
+ break;
+ case BPF_JLE:
+ /* LE is unsigned '<=', JBE in x86 */
+ jmp_cond = IA32_JBE;
+ break;
+ case BPF_JSGT:
+ /* Signed '>', GT in x86 */
+ jmp_cond = IA32_JG;
+ break;
+ case BPF_JSLT:
+ /* Signed '<', LT in x86 */
+ jmp_cond = IA32_JL;
+ break;
+ case BPF_JSGE:
+ /* Signed '>=', GE in x86 */
+ jmp_cond = IA32_JGE;
+ break;
+ case BPF_JSLE:
+ /* Signed '<=', LE in x86 */
+ jmp_cond = IA32_JLE;
+ break;
+ default: /* to silence GCC warning */
+ return -EFAULT;
+ }
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (is_imm8(jmp_offset)) {
+ EMIT2(jmp_cond, jmp_offset);
+ } else if (is_simm32(jmp_offset)) {
+ EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+ } else {
+ pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
+
+ break;
+ }
+ case BPF_JMP | BPF_JA:
+ if (insn->off == -1)
+ /* -1 jmp instructions will always jump
+ * backwards two bytes. Explicitly handling
+ * this case avoids wasting too many passes
+ * when there are long sequences of replaced
+ * dead code.
+ */
+ jmp_offset = -2;
+ else
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+
+ if (!jmp_offset)
+ /* Optimize out nop jumps */
+ break;
+emit_jmp:
+ if (is_imm8(jmp_offset)) {
+ EMIT2(0xEB, jmp_offset);
+ } else if (is_simm32(jmp_offset)) {
+ EMIT1_off32(0xE9, jmp_offset);
+ } else {
+ pr_err("jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
+ break;
+ /* STX XADD: lock *(u32 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_W:
+ /* STX XADD: lock *(u64 *)(dst + off) += src */
+ case BPF_STX | BPF_XADD | BPF_DW:
+ goto notyet;
+ case BPF_JMP | BPF_EXIT:
+ if (seen_exit) {
+ jmp_offset = ctx->cleanup_addr - addrs[i];
+ goto emit_jmp;
+ }
+ seen_exit = true;
+ /* Update cleanup_addr */
+ ctx->cleanup_addr = proglen;
+ emit_epilogue(&prog, bpf_prog->aux->stack_depth);
+ break;
+notyet:
+ pr_info_once("*** NOT YET: opcode %02x ***\n", code);
+ return -EFAULT;
+ default:
+ /*
+ * This error will be seen if new instruction was added
+ * to interpreter, but not to JIT or if there is junk in
+ * bpf_prog
+ */
+ pr_err("bpf_jit: unknown opcode %02x\n", code);
+ return -EINVAL;
+ }
+
+ ilen = prog - temp;
+ if (ilen > BPF_MAX_INSN_SIZE) {
+ pr_err("bpf_jit: fatal insn size error\n");
+ return -EFAULT;
+ }
+
+ if (image) {
+ if (unlikely(proglen + ilen > oldproglen)) {
+ pr_err("bpf_jit: fatal error\n");
+ return -EFAULT;
+ }
+ memcpy(image + proglen, temp, ilen);
+ }
+ proglen += ilen;
+ addrs[i] = proglen;
+ prog = temp;
+ }
+ return proglen;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+ struct bpf_binary_header *header = NULL;
+ struct bpf_prog *tmp, *orig_prog = prog;
+ int proglen, oldproglen = 0;
+ struct jit_context ctx = {};
+ bool tmp_blinded = false;
+ u8 *image = NULL;
+ int *addrs;
+ int pass;
+ int i;
+
+ if (!prog->jit_requested)
+ return orig_prog;
+
+ tmp = bpf_jit_blind_constants(prog);
+ /*
+ * If blinding was requested and we failed during blinding,
+ * we must fall back to the interpreter.
+ */
+ if (IS_ERR(tmp))
+ return orig_prog;
+ if (tmp != prog) {
+ tmp_blinded = true;
+ prog = tmp;
+ }
+
+ addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
+ if (!addrs) {
+ prog = orig_prog;
+ goto out;
+ }
+
+ /*
+ * Before first pass, make a rough estimation of addrs[]
+ * each BPF instruction is translated to less than 64 bytes
+ */
+ for (proglen = 0, i = 0; i < prog->len; i++) {
+ proglen += 64;
+ addrs[i] = proglen;
+ }
+ ctx.cleanup_addr = proglen;
+
+ /*
+ * JITed image shrinks with every pass and the loop iterates
+ * until the image stops shrinking. Very large BPF programs
+ * may converge on the last pass. In such case do one more
+ * pass to emit the final image.
+ */
+ for (pass = 0; pass < 20 || image; pass++) {
+ proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
+ if (proglen <= 0) {
+out_image:
+ image = NULL;
+ if (header)
+ bpf_jit_binary_free(header);
+ prog = orig_prog;
+ goto out_addrs;
+ }
+ if (image) {
+ if (proglen != oldproglen) {
+ pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
+ proglen, oldproglen);
+ goto out_image;
+ }
+ break;
+ }
+ if (proglen == oldproglen) {
+ header = bpf_jit_binary_alloc(proglen, &image,
+ 1, jit_fill_hole);
+ if (!header) {
+ prog = orig_prog;
+ goto out_addrs;
+ }
+ }
+ oldproglen = proglen;
+ cond_resched();
+ }
+
+ if (bpf_jit_enable > 1)
+ bpf_jit_dump(prog->len, proglen, pass + 1, image);
+
+ if (image) {
+ bpf_jit_binary_lock_ro(header);
+ prog->bpf_func = (void *)image;
+ prog->jited = 1;
+ prog->jited_len = proglen;
+ } else {
+ prog = orig_prog;
+ }
+
+out_addrs:
+ kfree(addrs);
+out:
+ if (tmp_blinded)
+ bpf_jit_prog_release_other(prog, prog == orig_prog ?
+ tmp : orig_prog);
+ return prog;
+}