aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/maccess.c43
-rw-r--r--arch/x86/net/bpf_jit_comp.c107
3 files changed, 147 insertions, 5 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..bbc68a54795e 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ /*
+ * Range covering the highest possible canonical userspace address
+ * as well as non-canonical address range. For the canonical range
+ * we also need to include the userspace guard page.
+ */
+ return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+ canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+ if (unlikely(invalid_probe_range((unsigned long)src)))
+ return -EFAULT;
+
+ return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+ if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+ return -EFAULT;
+
+ return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 991549a1c5f3..8cd23d8309bf 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,7 +9,7 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
-
+#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
@@ -123,6 +123,19 @@ static const int reg2hex[] = {
[AUX_REG] = 3, /* R11 temp register */
};
+static const int reg2pt_regs[] = {
+ [BPF_REG_0] = offsetof(struct pt_regs, ax),
+ [BPF_REG_1] = offsetof(struct pt_regs, di),
+ [BPF_REG_2] = offsetof(struct pt_regs, si),
+ [BPF_REG_3] = offsetof(struct pt_regs, dx),
+ [BPF_REG_4] = offsetof(struct pt_regs, cx),
+ [BPF_REG_5] = offsetof(struct pt_regs, r8),
+ [BPF_REG_6] = offsetof(struct pt_regs, bx),
+ [BPF_REG_7] = offsetof(struct pt_regs, r13),
+ [BPF_REG_8] = offsetof(struct pt_regs, r14),
+ [BPF_REG_9] = offsetof(struct pt_regs, r15),
+};
+
/*
* is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15
* which need extra byte of encoding.
@@ -377,6 +390,19 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+ u32 reg = x->fixup >> 8;
+
+ /* jump over faulting load and clear dest register */
+ *(unsigned long *)((void *)regs + reg) = 0;
+ regs->ip += x->fixup & 0xff;
+ return true;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -384,7 +410,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i, cnt = 0;
+ int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
@@ -778,14 +804,17 @@ stx: if (is_imm8(insn->off))
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* Emit 'movzx rax, byte ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* Emit 'movzx rax, word ptr [rax + off]' */
EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* Emit 'mov eax, dword ptr [rax+0x14]' */
if (is_ereg(dst_reg) || is_ereg(src_reg))
EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
@@ -793,6 +822,7 @@ stx: if (is_imm8(insn->off))
EMIT1(0x8B);
goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/* Emit 'mov rax, qword ptr [rax+0x14]' */
EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
ldx: /*
@@ -805,6 +835,48 @@ ldx: /*
else
EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
insn->off);
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen;
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ if (excnt >= bpf_prog->aux->num_exentries) {
+ pr_err("ex gen bug\n");
+ return -EFAULT;
+ }
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ if (!is_simm32(delta)) {
+ pr_err("extable->insn doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->insn = delta;
+
+ delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ if (!is_simm32(delta)) {
+ pr_err("extable->handler doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->handler = delta;
+
+ if (dst_reg > BPF_REG_9) {
+ pr_err("verifier error\n");
+ return -EFAULT;
+ }
+ /*
+ * Compute size of x86 insn and its target dest x86 register.
+ * ex_handler_bpf() will use lower 8 bits to adjust
+ * pt_regs->ip to jump over this x86 instruction
+ * and upper bits to figure out which pt_regs to zero out.
+ * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+ * of 4 bytes will be ignored and rbx will be zero inited.
+ */
+ ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+ }
break;
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -909,6 +981,16 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
+ /* test dst_reg, dst_reg to save one extra byte */
+ if (imm32 == 0) {
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+ else if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+ goto emit_cond_jmp;
+ }
+
/* cmp dst_reg, imm8/32 */
if (BPF_CLASS(insn->code) == BPF_JMP)
EMIT1(add_1mod(0x48, dst_reg));
@@ -1048,6 +1130,11 @@ emit_jmp:
addrs[i] = proglen;
prog = temp;
}
+
+ if (image && excnt != bpf_prog->aux->num_exentries) {
+ pr_err("extable is not populated\n");
+ return -EFAULT;
+ }
return proglen;
}
@@ -1148,12 +1235,24 @@ out_image:
break;
}
if (proglen == oldproglen) {
- header = bpf_jit_binary_alloc(proglen, &image,
- 1, jit_fill_hole);
+ /*
+ * The number of entries in extable is the number of BPF_LDX
+ * insns that access kernel memory via "pointer to BTF type".
+ * The verifier changed their opcode from LDX|MEM|size
+ * to LDX|PROBE_MEM|size to make JITing easier.
+ */
+ u32 align = __alignof__(struct exception_table_entry);
+ u32 extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+
+ /* allocate module memory for x86 insns and extable */
+ header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+ &image, align, jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
}
+ prog->aux->extable = (void *) image + roundup(proglen, align);
}
oldproglen = proglen;
cond_resched();