aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/boot/dts/am571x-idk.dts27
-rw-r--r--arch/arm/boot/dts/am572x-idk.dts5
-rw-r--r--arch/arm/boot/dts/am574x-idk.dts5
-rw-r--r--arch/arm/boot/dts/am57xx-idk-common.dtsi5
-rw-r--r--arch/arm/boot/dts/dra7-l4.dtsi52
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/mach-pxa/icontrol.c9
-rw-r--r--arch/arm/mach-pxa/zeus.c9
-rw-r--r--arch/powerpc/include/asm/local.h2
-rw-r--r--arch/s390/net/bpf_jit_comp.c502
-rw-r--r--arch/x86/include/asm/text-patching.h24
-rw-r--r--arch/x86/kernel/alternative.c132
-rw-r--r--arch/x86/kernel/jump_label.c9
-rw-r--r--arch/x86/kernel/kprobes/opt.c11
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/maccess.c43
-rw-r--r--arch/x86/net/bpf_jit_comp.c620
17 files changed, 1200 insertions, 258 deletions
diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index 0aaacea1d887..820ce3b60bb6 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -186,3 +186,30 @@
pinctrl-1 = <&mmc2_pins_hs>;
pinctrl-2 = <&mmc2_pins_ddr_rev20 &mmc2_iodelay_ddr_conf>;
};
+
+&mac_sw {
+ pinctrl-names = "default", "sleep";
+ status = "okay";
+};
+
+&cpsw_port1 {
+ phy-handle = <&ethphy0_sw>;
+ phy-mode = "rgmii";
+ ti,dual-emac-pvid = <1>;
+};
+
+&cpsw_port2 {
+ phy-handle = <&ethphy1_sw>;
+ phy-mode = "rgmii";
+ ti,dual-emac-pvid = <2>;
+};
+
+&davinci_mdio_sw {
+ ethphy0_sw: ethernet-phy@0 {
+ reg = <0>;
+ };
+
+ ethphy1_sw: ethernet-phy@1 {
+ reg = <1>;
+ };
+};
diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index ea1c119feaa5..c3d966904d64 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -27,3 +27,8 @@
pinctrl-1 = <&mmc2_pins_hs>;
pinctrl-2 = <&mmc2_pins_ddr_rev20>;
};
+
+&mac {
+ status = "okay";
+ dual_emac;
+};
diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts
index 7935d70874ce..fa0088025b2c 100644
--- a/arch/arm/boot/dts/am574x-idk.dts
+++ b/arch/arm/boot/dts/am574x-idk.dts
@@ -35,3 +35,8 @@
pinctrl-1 = <&mmc2_pins_default>;
pinctrl-2 = <&mmc2_pins_default>;
};
+
+&mac {
+ status = "okay";
+ dual_emac;
+};
diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index 423855a2a2d6..398721c7201c 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -363,11 +363,6 @@
ext-clk-src;
};
-&mac {
- status = "okay";
- dual_emac;
-};
-
&cpsw_emac0 {
phy-handle = <&ethphy0>;
phy-mode = "rgmii";
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 5cac2dd58241..37e048771b0f 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3079,6 +3079,58 @@
phys = <&phy_gmii_sel 2>;
};
};
+
+ mac_sw: switch@0 {
+ compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch";
+ reg = <0x0 0x4000>;
+ ranges = <0 0 0x4000>;
+ clocks = <&gmac_main_clk>;
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ syscon = <&scm_conf>;
+ status = "disabled";
+
+ interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "rx_thresh", "rx", "tx", "misc";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpsw_port1: port@1 {
+ reg = <1>;
+ label = "port1";
+ mac-address = [ 00 00 00 00 00 00 ];
+ phys = <&phy_gmii_sel 1>;
+ };
+
+ cpsw_port2: port@2 {
+ reg = <2>;
+ label = "port2";
+ mac-address = [ 00 00 00 00 00 00 ];
+ phys = <&phy_gmii_sel 2>;
+ };
+ };
+
+ davinci_mdio_sw: mdio@1000 {
+ compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+ clocks = <&gmac_main_clk>;
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ bus_freq = <1000000>;
+ reg = <0x1000 0x100>;
+ };
+
+ cpts {
+ clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>;
+ clock-names = "cpts";
+ };
+ };
};
};
};
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 40d7f1a4fc45..89cce8d4bc6b 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -554,3 +554,4 @@ CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_SCHEDSTATS=y
# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_TI_CPSW_SWITCHDEV=y
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 865b10344ea2..0474a4b1394d 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -12,6 +12,7 @@
#include <linux/irq.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/gpio.h>
#include <asm/mach-types.h>
@@ -22,7 +23,6 @@
#include <linux/spi/spi.h>
#include <linux/spi/pxa2xx_spi.h>
-#include <linux/can/platform/mcp251x.h>
#include <linux/regulator/machine.h>
#include "generic.h"
@@ -69,8 +69,9 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
.gpio_cs = ICONTROL_MCP251x_nCS4
};
-static struct mcp251x_platform_data mcp251x_info = {
- .oscillator_frequency = 16E6,
+static const struct property_entry mcp251x_properties[] = {
+ PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+ {}
};
static struct spi_board_info mcp251x_board_info[] = {
@@ -79,7 +80,7 @@ static struct spi_board_info mcp251x_board_info[] = {
.max_speed_hz = 6500000,
.bus_num = 3,
.chip_select = 0,
- .platform_data = &mcp251x_info,
+ .properties = mcp251x_properties,
.controller_data = &mcp251x_chip_info1,
.irq = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ1)
},
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index da113c8eefbf..b27fc7ac9cea 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -13,6 +13,7 @@
#include <linux/leds.h>
#include <linux/irq.h>
#include <linux/pm.h>
+#include <linux/property.h>
#include <linux/gpio.h>
#include <linux/gpio/machine.h>
#include <linux/serial_8250.h>
@@ -27,7 +28,6 @@
#include <linux/platform_data/i2c-pxa.h>
#include <linux/platform_data/pca953x.h>
#include <linux/apm-emulation.h>
-#include <linux/can/platform/mcp251x.h>
#include <linux/regulator/fixed.h>
#include <linux/regulator/machine.h>
@@ -428,14 +428,15 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = {
},
};
-static struct mcp251x_platform_data zeus_mcp2515_pdata = {
- .oscillator_frequency = 16*1000*1000,
+static const struct property_entry mcp251x_properties[] = {
+ PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+ {}
};
static struct spi_board_info zeus_spi_board_info[] = {
[0] = {
.modalias = "mcp2515",
- .platform_data = &zeus_mcp2515_pdata,
+ .properties = mcp251x_properties,
.irq = PXA_GPIO_TO_IRQ(ZEUS_CAN_GPIO),
.max_speed_hz = 1*1000*1000,
.bus_num = 3,
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index fdd00939270b..bc4bd19b7fc2 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -17,7 +17,7 @@ typedef struct
#define LOCAL_INIT(i) { (i) }
-static __inline__ long local_read(local_t *l)
+static __inline__ long local_read(const local_t *l)
{
return READ_ONCE(l->v);
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index ce88211b9c6c..8d2134136290 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -23,6 +23,8 @@
#include <linux/filter.h>
#include <linux/init.h>
#include <linux/bpf.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
#include <asm/facility.h>
@@ -38,10 +40,11 @@ struct bpf_jit {
int size; /* Size of program and literal pool */
int size_prg; /* Size of program */
int prg; /* Current position in program */
- int lit_start; /* Start of literal pool */
- int lit; /* Current position in literal pool */
+ int lit32_start; /* Start of 32-bit literal pool */
+ int lit32; /* Current position in 32-bit literal pool */
+ int lit64_start; /* Start of 64-bit literal pool */
+ int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
- int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
@@ -49,14 +52,10 @@ struct bpf_jit {
int labels[1]; /* Labels for local jumps */
};
-#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
-
-#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
-#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL (1 << 2) /* code uses literals */
-#define SEEN_FUNC (1 << 3) /* calls C functions */
-#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
-#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
+#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
+#define SEEN_LITERAL BIT(1) /* code uses literals */
+#define SEEN_FUNC BIT(2) /* calls C functions */
+#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
@@ -131,13 +130,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT2(op) \
({ \
if (jit->prg_buf) \
- *(u16 *) (jit->prg_buf + jit->prg) = op; \
+ *(u16 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 2; \
})
#define EMIT2(op, b1, b2) \
({ \
- _EMIT2(op | reg(b1, b2)); \
+ _EMIT2((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -145,20 +144,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT4(op) \
({ \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->prg) = op; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 4; \
})
#define EMIT4(op, b1, b2) \
({ \
- _EMIT4(op | reg(b1, b2)); \
+ _EMIT4((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT4_RRF(op, b1, b2, b3) \
({ \
- _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \
+ _EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
REG_SET_SEEN(b3); \
@@ -167,13 +166,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT4_DISP(op, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT4(op | __disp); \
+ _EMIT4((op) | __disp); \
})
#define EMIT4_DISP(op, b1, b2, disp) \
({ \
- _EMIT4_DISP(op | reg_high(b1) << 16 | \
- reg_high(b2) << 8, disp); \
+ _EMIT4_DISP((op) | reg_high(b1) << 16 | \
+ reg_high(b2) << 8, (disp)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -181,21 +180,27 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_IMM(op, b1, imm) \
({ \
unsigned int __imm = (imm) & 0xffff; \
- _EMIT4(op | reg_high(b1) << 16 | __imm); \
+ _EMIT4((op) | reg_high(b1) << 16 | __imm); \
REG_SET_SEEN(b1); \
})
#define EMIT4_PCREL(op, pcrel) \
({ \
long __pcrel = ((pcrel) >> 1) & 0xffff; \
- _EMIT4(op | __pcrel); \
+ _EMIT4((op) | __pcrel); \
+})
+
+#define EMIT4_PCREL_RIC(op, mask, target) \
+({ \
+ int __rel = ((target) - jit->prg) / 2; \
+ _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
#define _EMIT6(op1, op2) \
({ \
if (jit->prg_buf) { \
- *(u32 *) (jit->prg_buf + jit->prg) = op1; \
- *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op1); \
+ *(u16 *) (jit->prg_buf + jit->prg + 4) = (op2); \
} \
jit->prg += 6; \
})
@@ -203,20 +208,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT6_DISP(op1, op2, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT6(op1 | __disp, op2); \
+ _EMIT6((op1) | __disp, op2); \
})
#define _EMIT6_DISP_LH(op1, op2, disp) \
({ \
- u32 _disp = (u32) disp; \
+ u32 _disp = (u32) (disp); \
unsigned int __disp_h = _disp & 0xff000; \
unsigned int __disp_l = _disp & 0x00fff; \
- _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
+ _EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4); \
})
#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \
({ \
- _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \
+ _EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 | \
reg_high(b3) << 8, op2, disp); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
@@ -226,8 +231,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
- op2 | mask << 12); \
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
+ (op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -235,68 +240,83 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
- (rel & 0xffff), op2 | (imm & 0xff) << 8); \
+ _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
+ (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
- BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
+ BUILD_BUG_ON(((unsigned long) (imm)) > 0xff); \
})
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
/* Branch instruction needs 6 bytes */ \
- int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
+ int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT6_PCREL_RILB(op, b, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
REG_SET_SEEN(b); \
})
#define EMIT6_PCREL_RIL(op, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | rel >> 16, rel & 0xffff); \
+})
+
+#define EMIT6_PCREL_RILC(op, mask, target) \
+({ \
+ EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
})
#define _EMIT6_IMM(op, imm) \
({ \
unsigned int __imm = (imm); \
- _EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+ _EMIT6((op) | (__imm >> 16), __imm & 0xffff); \
})
#define EMIT6_IMM(op, b1, imm) \
({ \
- _EMIT6_IMM(op | reg_high(b1) << 16, imm); \
+ _EMIT6_IMM((op) | reg_high(b1) << 16, imm); \
REG_SET_SEEN(b1); \
})
-#define EMIT_CONST_U32(val) \
+#define _EMIT_CONST_U32(val) \
({ \
unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
- jit->seen |= SEEN_LITERAL; \
+ ret = jit->lit32; \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \
- jit->lit += 4; \
+ *(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+ jit->lit32 += 4; \
ret; \
})
-#define EMIT_CONST_U64(val) \
+#define EMIT_CONST_U32(val) \
({ \
- unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
jit->seen |= SEEN_LITERAL; \
+ _EMIT_CONST_U32(val) - jit->base_ip; \
+})
+
+#define _EMIT_CONST_U64(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit64; \
if (jit->prg_buf) \
- *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \
- jit->lit += 8; \
+ *(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+ jit->lit64 += 8; \
ret; \
})
+#define EMIT_CONST_U64(val) \
+({ \
+ jit->seen |= SEEN_LITERAL; \
+ _EMIT_CONST_U64(val) - jit->base_ip; \
+})
+
#define EMIT_ZERO(b1) \
({ \
if (!fp->aux->verifier_zext) { \
@@ -307,6 +327,67 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
})
/*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+ return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+ return jit->prg_buf;
+}
+
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+ return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+ return is_valid_rel(off - jit->prg);
+}
+
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+ return disp >= -524288 && disp <= 524287;
+}
+
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
+/*
* Fill whole space with illegal instructions
*/
static void jit_fill_hole(void *area, unsigned int size)
@@ -383,9 +464,18 @@ static int get_end(struct bpf_jit *jit, int start)
*/
static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
{
-
+ const int last = 15, save_restore_size = 6;
int re = 6, rs;
+ if (is_first_pass(jit)) {
+ /*
+ * We don't know yet which registers are used. Reserve space
+ * conservatively.
+ */
+ jit->prg += (last - re + 1) * save_restore_size;
+ return;
+ }
+
do {
rs = get_start(jit, re);
if (!rs)
@@ -396,7 +486,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
else
restore_regs(jit, rs, re, stack_depth);
re++;
- } while (re <= 15);
+ } while (re <= last);
}
/*
@@ -420,21 +510,28 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
/* Save registers */
save_restore_regs(jit, REGS_SAVE, stack_depth);
/* Setup literal pool */
- if (jit->seen & SEEN_LITERAL) {
- /* basr %r13,0 */
- EMIT2(0x0d00, REG_L, REG_0);
- jit->base_ip = jit->prg;
+ if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
+ if (!is_first_pass(jit) &&
+ is_valid_ldisp(jit->size - (jit->prg + 2))) {
+ /* basr %l,0 */
+ EMIT2(0x0d00, REG_L, REG_0);
+ jit->base_ip = jit->prg;
+ } else {
+ /* larl %l,lit32_start */
+ EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+ jit->base_ip = jit->lit32_start;
+ }
}
/* Setup stack and backchain */
- if (jit->seen & SEEN_STACK) {
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* lgr %w1,%r15 (backchain) */
EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* stg %w1,152(%r15) (backchain) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
@@ -446,12 +543,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
*/
static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
{
- /* Return 0 */
- if (jit->seen & SEEN_RET0) {
- jit->ret0_ip = jit->prg;
- /* lghi %b0,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
- }
jit->exit_ip = jit->prg;
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
@@ -476,7 +567,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
_EMIT2(0x07fe);
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
- (jit->seen & SEEN_FUNC)) {
+ (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
jit->r1_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r1 thunk */
if (test_facility(35)) {
@@ -506,16 +597,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int i, bool extra_pass)
{
struct bpf_insn *insn = &fp->insnsi[i];
- int jmp_off, last, insn_count = 1;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
+ int last, insn_count = 1;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
unsigned int mask;
- if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
- jit->seen |= SEEN_REG_AX;
switch (insn->code) {
/*
* BPF_MOV
@@ -549,9 +638,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
u64 imm64;
imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
- /* lg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm64));
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
insn_count = 2;
break;
}
@@ -680,9 +768,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
EMIT2(0x1800, REG_W1, dst_reg);
- /* dl %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
- EMIT_CONST_U32(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U32(imm));
+ } else {
+ /* lgfrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlr %w0,%dst */
+ EMIT4(0xb9970000, REG_W0, dst_reg);
+ }
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
@@ -704,9 +801,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
- /* dlg %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, dst_reg);
+ }
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -729,9 +835,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
- /* ng %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* ng %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0080,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ngr %dst,%w0 */
+ EMIT4(0xb9800000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_OR
@@ -751,9 +867,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
- /* og %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* og %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0081,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ogr %dst,%w0 */
+ EMIT4(0xb9810000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_XOR
@@ -775,9 +901,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
- /* xg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* xg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0082,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* xgr %dst,%w0 */
+ EMIT4(0xb9820000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_LSH
@@ -1023,9 +1159,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
- /* lg %w1,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
- EMIT_CONST_U64(func));
+ /* lgrl %w1,func */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
/* brasl %r14,__s390_indirect_jump_r1 */
EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
@@ -1054,9 +1189,17 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* llgf %w1,map.max_entries(%b2) */
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
offsetof(struct bpf_array, map.max_entries));
- /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
- EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
- REG_W1, 0, 0xa);
+ /* if ((u32)%b3 >= (u32)%w1) goto out; */
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* clrj %b3,%w1,0xa,label0 */
+ EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
+ REG_W1, 0, 0xa);
+ } else {
+ /* clr %b3,%w1 */
+ EMIT2(0x1500, BPF_REG_3, REG_W1);
+ /* brcl 0xa,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]);
+ }
/*
* if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1071,9 +1214,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
- MAX_TAIL_CALL_CNT, 0, 0x2);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+ MAX_TAIL_CALL_CNT, 0, 0x2);
+ } else {
+ /* clfi %w1,MAX_TAIL_CALL_CNT */
+ EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT);
+ /* brcl 0x2,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]);
+ }
/*
* prog = array->ptrs[index];
@@ -1085,11 +1235,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9160000, REG_1, BPF_REG_3);
/* sllg %r1,%r1,3: %r1 *= 8 */
EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
- /* lg %r1,prog(%b2,%r1) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+ /* ltg %r1,prog(%b2,%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
REG_1, offsetof(struct bpf_array, ptrs));
- /* clgij %r1,0,0x8,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* brc 0x8,label0 */
+ EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]);
+ } else {
+ /* brcl 0x8,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]);
+ }
/*
* Restore registers before calling function
@@ -1110,7 +1265,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
break;
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
- if (last && !(jit->seen & SEEN_RET0))
+ if (last)
break;
/* j <exit> */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
@@ -1246,36 +1401,83 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
goto branch_oc;
branch_ks:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* crj or cgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, REG_W1, i, off, mask);
+ /* cfi or cgfi %dst,imm */
+ EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+ dst_reg, imm);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_ku:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* clrj or clgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, REG_W1, i, off, mask);
+ /* clfi or clgfi %dst,imm */
+ EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
+ dst_reg, imm);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_xs:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* crj or cgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* crj or cgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* cr or cgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1900, dst_reg, src_reg);
+ else
+ EMIT4(0xb9200000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_xu:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* clrj or clgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* clrj or clgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* clr or clgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1500, dst_reg, src_reg);
+ else
+ EMIT4(0xb9210000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_oc:
- /* brc mask,jmp_off (branch instruction needs 4 bytes) */
- jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
- EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
}
default: /* too complex, give up */
@@ -1286,28 +1488,67 @@ branch_oc:
}
/*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+ /* On the first pass anything goes */
+ if (is_first_pass(jit))
+ return true;
+
+ /* The codegen pass must not change anything */
+ if (is_codegen_pass(jit))
+ return jit->addrs[i] == jit->prg;
+
+ /* Passes in between must not increase code size */
+ return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+ if (!bpf_is_new_addr_sane(jit, i))
+ return -1;
+ jit->addrs[i] = jit->prg;
+ return 0;
+}
+
+/*
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
bool extra_pass)
{
- int i, insn_count;
+ int i, insn_count, lit32_size, lit64_size;
- jit->lit = jit->lit_start;
+ jit->lit32 = jit->lit32_start;
+ jit->lit64 = jit->lit64_start;
jit->prg = 0;
bpf_jit_prologue(jit, fp->aux->stack_depth);
+ if (bpf_set_addr(jit, 0) < 0)
+ return -1;
for (i = 0; i < fp->len; i += insn_count) {
insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
if (insn_count < 0)
return -1;
/* Next instruction address */
- jit->addrs[i + insn_count] = jit->prg;
+ if (bpf_set_addr(jit, i + insn_count) < 0)
+ return -1;
}
bpf_jit_epilogue(jit, fp->aux->stack_depth);
- jit->lit_start = jit->prg;
- jit->size = jit->lit;
+ lit32_size = jit->lit32 - jit->lit32_start;
+ lit64_size = jit->lit64 - jit->lit64_start;
+ jit->lit32_start = jit->prg;
+ if (lit32_size)
+ jit->lit32_start = ALIGN(jit->lit32_start, 4);
+ jit->lit64_start = jit->lit32_start + lit32_size;
+ if (lit64_size)
+ jit->lit64_start = ALIGN(jit->lit64_start, 8);
+ jit->size = jit->lit64_start + lit64_size;
jit->size_prg = jit->prg;
return 0;
}
@@ -1369,7 +1610,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
}
memset(&jit, 0, sizeof(jit));
- jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+ jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
if (jit.addrs == NULL) {
fp = orig_fp;
goto out;
@@ -1388,12 +1629,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/*
* Final pass: Allocate and generate program
*/
- if (jit.size >= BPF_SIZE_MAX) {
- fp = orig_fp;
- goto free_addrs;
- }
-
- header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+ header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
if (!header) {
fp = orig_fp;
goto free_addrs;
@@ -1422,7 +1658,7 @@ skip_init_ctx:
if (!fp->is_func || extra_pass) {
bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
free_addrs:
- kfree(jit.addrs);
+ kvfree(jit.addrs);
kfree(jit_data);
fp->aux->jit_data = NULL;
}
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 5e8319bb207a..23c626a742e8 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define POKE_MAX_OPCODE_SIZE 5
struct text_poke_loc {
- void *detour;
void *addr;
- size_t len;
- const char opcode[POKE_MAX_OPCODE_SIZE];
+ int len;
+ s32 rel32;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
};
extern void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
+extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
-#define INT3_INSN_SIZE 1
-#define CALL_INSN_SIZE 5
+#define INT3_INSN_SIZE 1
+#define INT3_INSN_OPCODE 0xCC
+
+#define CALL_INSN_SIZE 5
+#define CALL_INSN_OPCODE 0xE8
+
+#define JMP32_INSN_SIZE 5
+#define JMP32_INSN_OPCODE 0xE9
+
+#define JMP8_INSN_SIZE 2
+#define JMP8_INSN_OPCODE 0xEB
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9d3a971ea364..9ec463fe96f2 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
{
struct text_poke_loc *tp;
- unsigned char int3 = 0xcc;
void *ip;
/*
* Having observed our INT3 instruction, we now must observe
* bp_patching.nr_entries.
*
- * nr_entries != 0 INT3
- * WMB RMB
- * write INT3 if (nr_entries)
+ * nr_entries != 0 INT3
+ * WMB RMB
+ * write INT3 if (nr_entries)
*
* Idem for other elements in bp_patching.
*/
@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs)
return 0;
/*
- * Discount the sizeof(int3). See text_poke_bp_batch().
+ * Discount the INT3. See text_poke_bp_batch().
*/
- ip = (void *) regs->ip - sizeof(int3);
+ ip = (void *) regs->ip - INT3_INSN_SIZE;
/*
* Skip the binary search if there is a single member in the vector.
@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs)
return 0;
}
- /* set up the specified breakpoint detour */
- regs->ip = (unsigned long) tp->detour;
+ ip += tp->len;
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ /*
+ * Someone poked an explicit INT3, they'll want to handle it,
+ * do not consume.
+ */
+ return 0;
+
+ case CALL_INSN_OPCODE:
+ int3_emulate_call(regs, (long)ip + tp->rel32);
+ break;
+
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ int3_emulate_jmp(regs, (long)ip + tp->rel32);
+ break;
+
+ default:
+ BUG();
+ }
return 1;
}
@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* synchronization using int3 breakpoint.
*
* The way it is done:
- * - For each entry in the vector:
+ * - For each entry in the vector:
* - add a int3 trap to the address that will be patched
* - sync cores
* - For each entry in the vector:
@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler);
*/
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
- int patched_all_but_first = 0;
- unsigned char int3 = 0xcc;
+ unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
+ int do_sync;
lockdep_assert_held(&text_mutex);
@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
/*
* Second step: update all but the first byte of the patched range.
*/
- for (i = 0; i < nr_entries; i++) {
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
if (tp[i].len - sizeof(int3) > 0) {
text_poke((char *)tp[i].addr + sizeof(int3),
- (const char *)tp[i].opcode + sizeof(int3),
+ (const char *)tp[i].text + sizeof(int3),
tp[i].len - sizeof(int3));
- patched_all_but_first++;
+ do_sync++;
}
}
- if (patched_all_but_first) {
+ if (do_sync) {
/*
* According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But
@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* Third step: replace the first byte (int3) by the first byte of
* replacing opcode.
*/
- for (i = 0; i < nr_entries; i++)
- text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
+ if (tp[i].text[0] == INT3_INSN_OPCODE)
+ continue;
+
+ text_poke(tp[i].addr, tp[i].text, sizeof(int3));
+ do_sync++;
+ }
+
+ if (do_sync)
+ on_each_cpu(do_sync_core, NULL, 1);
- on_each_cpu(do_sync_core, NULL, 1);
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
bp_patching.nr_entries = 0;
}
+void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate)
+{
+ struct insn insn;
+
+ if (!opcode)
+ opcode = (void *)tp->text;
+ else
+ memcpy((void *)tp->text, opcode, len);
+
+ if (!emulate)
+ emulate = opcode;
+
+ kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+ insn_get_length(&insn);
+
+ BUG_ON(!insn_complete(&insn));
+ BUG_ON(len != insn.length);
+
+ tp->addr = addr;
+ tp->len = len;
+ tp->opcode = insn.opcode.bytes[0];
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ break;
+
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ tp->rel32 = insn.immediate.value;
+ break;
+
+ default: /* assume NOP */
+ switch (len) {
+ case 2: /* NOP2 -- emulate as JMP8+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[len], len));
+ tp->opcode = JMP8_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ case 5: /* NOP5 -- emulate as JMP32+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+ tp->opcode = JMP32_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ default: /* unknown instruction */
+ BUG();
+ }
+ break;
+ }
+}
+
/**
* text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch
@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
- struct text_poke_loc tp = {
- .detour = handler,
- .addr = addr,
- .len = len,
- };
-
- if (len > POKE_MAX_OPCODE_SIZE) {
- WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
- return;
- }
-
- memcpy((void *)tp.opcode, opcode, len);
+ struct text_poke_loc tp;
+ text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 044053235302..c1a8b9e71408 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
return;
}
- text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
- (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
}
void arch_jump_label_transform(struct jump_entry *entry,
@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
}
__jump_label_set_jump_code(entry, type,
- (union jump_code_union *) &tp->opcode, 0);
+ (union jump_code_union *)&tp->text, 0);
- tp->addr = entry_code;
- tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
- tp->len = JUMP_LABEL_NOP_SIZE;
+ text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
tp_vec_nr++;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index b348dd506d58..8900329c28a7 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
insn_buff[0] = RELATIVEJUMP_OPCODE;
*(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
list_del_init(&op->list);
}
@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist)
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
u8 insn_buff[RELATIVEJUMP_SIZE];
+ u8 emulate_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
insn_buff[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ emulate_buff[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ emulate_buff);
}
/*
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..bbc68a54795e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ /*
+ * Range covering the highest possible canonical userspace address
+ * as well as non-canonical address range. For the canonical range
+ * we also need to include the userspace guard page.
+ */
+ return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+ canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+ if (unlikely(invalid_probe_range((unsigned long)src)))
+ return -EFAULT;
+
+ return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+ if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+ return -EFAULT;
+
+ return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 991549a1c5f3..b8be18427277 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,9 +9,11 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
-
+#include <linux/memory.h>
+#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/text-patching.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -96,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
+#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -104,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * Also x86-64 register R9 is unused. x86-64 register R10 is
- * used for blinding (if enabled).
+ * x86-64 register R9 is not used by BPF programs, but can be used by BPF
+ * trampoline. x86-64 register R10 is used for blinding (if enabled).
*/
static const int reg2hex[] = {
[BPF_REG_0] = 0, /* RAX */
@@ -121,6 +124,20 @@ static const int reg2hex[] = {
[BPF_REG_FP] = 5, /* RBP readonly */
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
+ [X86_REG_R9] = 1, /* R9 register, 6th function argument */
+};
+
+static const int reg2pt_regs[] = {
+ [BPF_REG_0] = offsetof(struct pt_regs, ax),
+ [BPF_REG_1] = offsetof(struct pt_regs, di),
+ [BPF_REG_2] = offsetof(struct pt_regs, si),
+ [BPF_REG_3] = offsetof(struct pt_regs, dx),
+ [BPF_REG_4] = offsetof(struct pt_regs, cx),
+ [BPF_REG_5] = offsetof(struct pt_regs, r8),
+ [BPF_REG_6] = offsetof(struct pt_regs, bx),
+ [BPF_REG_7] = offsetof(struct pt_regs, r13),
+ [BPF_REG_8] = offsetof(struct pt_regs, r14),
+ [BPF_REG_9] = offsetof(struct pt_regs, r15),
};
/*
@@ -135,6 +152,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_7) |
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
+ BIT(X86_REG_R9) |
BIT(BPF_REG_AX));
}
@@ -186,7 +204,10 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-#define PROLOGUE_SIZE 20
+/* Number of bytes emit_patch() needs to generate instructions */
+#define X86_PATCH_SIZE 5
+
+#define PROLOGUE_SIZE 25
/*
* Emit x86-64 prologue code for BPF program and check its size.
@@ -195,8 +216,13 @@ struct jit_context {
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
- int cnt = 0;
+ int cnt = X86_PATCH_SIZE;
+ /* BPF trampoline can be made to work without these nops,
+ * but let's waste 5 bytes for now and optimize later
+ */
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+ prog += cnt;
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
@@ -213,6 +239,89 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*pprog = prog;
}
+static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + X86_PATCH_SIZE);
+ if (!is_simm32(offset)) {
+ pr_err("Target call %p is out of range\n", func);
+ return -ERANGE;
+ }
+ EMIT1_off32(opcode, offset);
+ *pprog = prog;
+ return 0;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE8);
+}
+
+static int emit_jump(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE9);
+}
+
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr,
+ const bool text_live)
+{
+ const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
+ u8 old_insn[X86_PATCH_SIZE];
+ u8 new_insn[X86_PATCH_SIZE];
+ u8 *prog;
+ int ret;
+
+ memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
+ if (old_addr) {
+ prog = old_insn;
+ ret = t == BPF_MOD_CALL ?
+ emit_call(&prog, old_addr, ip) :
+ emit_jump(&prog, old_addr, ip);
+ if (ret)
+ return ret;
+ }
+
+ memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
+ if (new_addr) {
+ prog = new_insn;
+ ret = t == BPF_MOD_CALL ?
+ emit_call(&prog, new_addr, ip) :
+ emit_jump(&prog, new_addr, ip);
+ if (ret)
+ return ret;
+ }
+
+ ret = -EBUSY;
+ mutex_lock(&text_mutex);
+ if (memcmp(ip, old_insn, X86_PATCH_SIZE))
+ goto out;
+ if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
+ if (text_live)
+ text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+ else
+ memcpy(ip, new_insn, X86_PATCH_SIZE);
+ }
+ ret = 0;
+out:
+ mutex_unlock(&text_mutex);
+ return ret;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ if (!is_kernel_text((long)ip) &&
+ !is_bpf_text_address((long)ip))
+ /* BPF poking in modules is not supported */
+ return -EINVAL;
+
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+}
+
/*
* Generate the following code:
*
@@ -227,7 +336,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog)
{
u8 *prog = *pprog;
int label1, label2, label3;
@@ -294,6 +403,68 @@ static void emit_bpf_tail_call(u8 **pprog)
*pprog = prog;
}
+static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+ u8 **pprog, int addr, u8 *image)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+ EMIT2(X86_JA, 14); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+
+ poke->ip = image + (addr - X86_PATCH_SIZE);
+ poke->adj_off = PROLOGUE_SIZE;
+
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+ prog += X86_PATCH_SIZE;
+ /* out: */
+
+ *pprog = prog;
+}
+
+static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
+{
+ struct bpf_jit_poke_descriptor *poke;
+ struct bpf_array *array;
+ struct bpf_prog *target;
+ int i, ret;
+
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ poke = &prog->aux->poke_tab[i];
+ WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+
+ if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+ continue;
+
+ array = container_of(poke->tail_call.map, struct bpf_array, map);
+ mutex_lock(&array->aux->poke_mutex);
+ target = array->ptrs[poke->tail_call.key];
+ if (target) {
+ /* Plain memcpy is used when image is not live yet
+ * and still not locked as read-only. Once poke
+ * location is active (poke->ip_stable), any parallel
+ * bpf_arch_text_poke() might occur still on the
+ * read-write image until we finally locked it as
+ * read-only. Both modifications on the given image
+ * are under text_mutex to avoid interference.
+ */
+ ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
+ (u8 *)target->bpf_func +
+ poke->adj_off, false);
+ BUG_ON(ret < 0);
+ }
+ WRITE_ONCE(poke->ip_stable, true);
+ mutex_unlock(&array->aux->poke_mutex);
+ }
+}
+
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u32 dst_reg, const u32 imm32)
{
@@ -377,6 +548,96 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+/* LDX: dst_reg = *(u8*)(src_reg + off) */
+static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'movzx rax, byte ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* Emit 'movzx rax, word ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* Emit 'mov eax, dword ptr [rax+0x14]' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+ else
+ EMIT1(0x8B);
+ break;
+ case BPF_DW:
+ /* Emit 'mov rax, qword ptr [rax+0x14]' */
+ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+ break;
+ }
+ /*
+ * If insn->off == 0 we can save one extra byte, but
+ * special case of x86 R13 which always needs an offset
+ * is not worth the hassle
+ */
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+ *pprog = prog;
+}
+
+/* STX: *(u8*)(dst_reg + off) = src_reg */
+static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'mov byte ptr [rax + off], al' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+ /* We have to add extra byte for x86 SIL, DIL regs */
+ src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+ else
+ EMIT1(0x88);
+ break;
+ case BPF_H:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT2(0x66, 0x89);
+ break;
+ case BPF_W:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT1(0x89);
+ break;
+ case BPF_DW:
+ EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+ break;
+ }
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+ *pprog = prog;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+ u32 reg = x->fixup >> 8;
+
+ /* jump over faulting load and clear dest register */
+ *(unsigned long *)((void *)regs + reg) = 0;
+ regs->ip += x->fixup & 0xff;
+ return true;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -384,7 +645,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i, cnt = 0;
+ int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
@@ -747,64 +1008,64 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
- /* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
- else
- EMIT1(0x88);
- goto stx;
case BPF_STX | BPF_MEM | BPF_H:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT2(0x66, 0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_W:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT1(0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_DW:
- EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
-stx: if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
- insn->off);
+ emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- /* Emit 'movzx rax, byte ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_H:
- /* Emit 'movzx rax, word ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_W:
- /* Emit 'mov eax, dword ptr [rax+0x14]' */
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
- else
- EMIT1(0x8B);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW:
- /* Emit 'mov rax, qword ptr [rax+0x14]' */
- EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx: /*
- * If insn->off == 0 we can save one extra byte, but
- * special case of x86 R13 which always needs an offset
- * is not worth the hassle
- */
- if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
- insn->off);
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen;
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ if (excnt >= bpf_prog->aux->num_exentries) {
+ pr_err("ex gen bug\n");
+ return -EFAULT;
+ }
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ if (!is_simm32(delta)) {
+ pr_err("extable->insn doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->insn = delta;
+
+ delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ if (!is_simm32(delta)) {
+ pr_err("extable->handler doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->handler = delta;
+
+ if (dst_reg > BPF_REG_9) {
+ pr_err("verifier error\n");
+ return -EFAULT;
+ }
+ /*
+ * Compute size of x86 insn and its target dest x86 register.
+ * ex_handler_bpf() will use lower 8 bits to adjust
+ * pt_regs->ip to jump over this x86 instruction
+ * and upper bits to figure out which pt_regs to zero out.
+ * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+ * of 4 bytes will be ignored and rbx will be zero inited.
+ */
+ ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+ }
break;
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -827,17 +1088,16 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- jmp_offset = func - (image + addrs[i]);
- if (!imm32 || !is_simm32(jmp_offset)) {
- pr_err("unsupported BPF func %d addr %p image %p\n",
- imm32, func, image);
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
return -EINVAL;
- }
- EMIT1_off32(0xE8, jmp_offset);
break;
case BPF_JMP | BPF_TAIL_CALL:
- emit_bpf_tail_call(&prog);
+ if (imm32)
+ emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+ &prog, addrs[i], image);
+ else
+ emit_bpf_tail_call_indirect(&prog);
break;
/* cond jump */
@@ -909,6 +1169,16 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
+ /* test dst_reg, dst_reg to save one extra byte */
+ if (imm32 == 0) {
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+ else if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+ goto emit_cond_jmp;
+ }
+
/* cmp dst_reg, imm8/32 */
if (BPF_CLASS(insn->code) == BPF_JMP)
EMIT1(add_1mod(0x48, dst_reg));
@@ -1048,9 +1318,218 @@ emit_jmp:
addrs[i] = proglen;
prog = temp;
}
+
+ if (image && excnt != bpf_prog->aux->num_exentries) {
+ pr_err("extable is not populated\n");
+ return -EFAULT;
+ }
return proglen;
}
+static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+ /* Store function arguments to stack.
+ * For a function that accepts two pointers the sequence will be:
+ * mov QWORD PTR [rbp-0x10],rdi
+ * mov QWORD PTR [rbp-0x8],rsi
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ BPF_REG_FP,
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ -(stack_size - i * 8));
+}
+
+static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+
+ /* Restore function arguments from stack.
+ * For a function that accepts two pointers the sequence will be:
+ * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+ * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ BPF_REG_FP,
+ -(stack_size - i * 8));
+}
+
+static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
+ struct bpf_prog **progs, int prog_cnt, int stack_size)
+{
+ u8 *prog = *pprog;
+ int cnt = 0, i;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+ /* arg1: lea rdi, [rbp - stack_size] */
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ /* arg2: progs[i]->insnsi for interpreter */
+ if (!progs[i]->jited)
+ emit_mov_imm64(&prog, BPF_REG_2,
+ (long) progs[i]->insnsi >> 32,
+ (u32) (long) progs[i]->insnsi);
+ /* call JITed bpf program or interpreter */
+ if (emit_call(&prog, progs[i]->bpf_func, prog))
+ return -EINVAL;
+
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
+ (u32) (long) progs[i]);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
+ *pprog = prog;
+ return 0;
+}
+
+/* Example:
+ * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+ * its 'struct btf_func_model' will be nr_args=2
+ * The assembly code when eth_type_trans is executing after trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 16 // space for skb and dev
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 16], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 8], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 16] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack
+ * pop rbx
+ * leave
+ * ret
+ *
+ * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
+ * replaced with 'call generated_bpf_trampoline'. When it returns
+ * eth_type_trans will continue executing with original skb and dev pointers.
+ *
+ * The assembly code when eth_type_trans is called from trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 24 // space for skb, dev, return value
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 24], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 16], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time if bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack
+ * call eth_type_trans+5 // execute body of eth_type_trans
+ * mov qword ptr [rbp - 8], rax // save return value
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value
+ * pop rbx
+ * leave
+ * add rsp, 8 // skip eth_type_trans's frame
+ * ret // return to its caller
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ int cnt = 0, nr_args = m->nr_args;
+ int stack_size = nr_args * 8;
+ u8 *prog;
+
+ /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+ if (nr_args > 6)
+ return -ENOTSUPP;
+
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+ (flags & BPF_TRAMP_F_SKIP_FRAME))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ stack_size += 8; /* room for return value of orig_call */
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip patched call instruction and point orig_call to actual
+ * body of the kernel function.
+ */
+ orig_call += X86_PATCH_SIZE;
+
+ prog = image;
+
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
+ EMIT1(0x53); /* push rbx */
+
+ save_regs(m, &prog, nr_args, stack_size);
+
+ if (fentry_cnt)
+ if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ if (fentry_cnt)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ /* call original function */
+ if (emit_call(&prog, orig_call, prog))
+ return -EINVAL;
+ /* remember return value in a stack for bpf prog to access */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+ }
+
+ if (fexit_cnt)
+ if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ /* restore original return value back into RAX */
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+
+ EMIT1(0x5B); /* pop rbx */
+ EMIT1(0xC9); /* leave */
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip our return address and return to parent */
+ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+ EMIT1(0xC3); /* ret */
+ /* One half of the page has active running trampoline.
+ * Another half is an area for next trampoline.
+ * Make sure the trampoline generation logic doesn't overflow.
+ */
+ if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+ return -EFAULT;
+ return 0;
+}
+
struct x64_jit_data {
struct bpf_binary_header *header;
int *addrs;
@@ -1148,12 +1627,24 @@ out_image:
break;
}
if (proglen == oldproglen) {
- header = bpf_jit_binary_alloc(proglen, &image,
- 1, jit_fill_hole);
+ /*
+ * The number of entries in extable is the number of BPF_LDX
+ * insns that access kernel memory via "pointer to BTF type".
+ * The verifier changed their opcode from LDX|MEM|size
+ * to LDX|PROBE_MEM|size to make JITing easier.
+ */
+ u32 align = __alignof__(struct exception_table_entry);
+ u32 extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+
+ /* allocate module memory for x86 insns and extable */
+ header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+ &image, align, jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
}
+ prog->aux->extable = (void *) image + roundup(proglen, align);
}
oldproglen = proglen;
cond_resched();
@@ -1164,6 +1655,7 @@ out_image:
if (image) {
if (!prog->is_func || extra_pass) {
+ bpf_tail_call_direct_fixup(prog);
bpf_jit_binary_lock_ro(header);
} else {
jit_data->addrs = addrs;