diff options
Diffstat (limited to 'arch/powerpc/net')
-rw-r--r-- | arch/powerpc/net/bpf_jit.h | 57 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit64.h | 91 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp.c | 129 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp32.c | 309 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 593 |
5 files changed, 724 insertions, 455 deletions
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 7e9b978b768e..a4f7880f959d 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -13,7 +13,7 @@ #include <asm/types.h> #include <asm/ppc-opcode.h> -#ifdef PPC64_ELF_ABI_v1 +#ifdef CONFIG_PPC64_ELF_ABI_V1 #define FUNCTION_DESCR_SIZE 24 #else #define FUNCTION_DESCR_SIZE 0 @@ -27,21 +27,21 @@ #define PPC_JMP(dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ - EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + EMIT(PPC_RAW_BRANCH(offset)); \ } while (0) -/* blr; (unconditional 'branch' with link) to absolute address */ -#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \ - (((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc)) +/* bl (unconditional 'branch' with link) */ +#define PPC_BL(dest) EMIT(PPC_RAW_BL((dest) - (unsigned long)(image + ctx->idx))) + /* "cond" here covers BO:BI fields. */ #define PPC_BCC_SHORT(cond, dest) \ do { \ long offset = (long)(dest) - (ctx->idx * 4); \ - if (!is_offset_in_cond_branch_range(offset)) { \ + if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ return -ERANGE; \ } \ @@ -59,10 +59,7 @@ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ } } while(0) -#ifdef CONFIG_PPC32 -#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) -#endif - +#ifdef CONFIG_PPC64 #define PPC_LI64(d, i) do { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ @@ -85,11 +82,6 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) - -#ifdef CONFIG_PPC64 -#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0) -#else -#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif /* @@ -125,17 +117,7 @@ #define COND_LE (CR0_GT | COND_CMP_FALSE) #define SEEN_FUNC 0x20000000 /* might call external helpers */ -#define SEEN_STACK 0x40000000 /* uses BPF stack */ -#define SEEN_TAILCALL 0x80000000 /* uses tail calls */ - -#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ -#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */ - -#ifdef CONFIG_PPC64 -extern const int b2p[MAX_BPF_JIT_REG + 2]; -#else -extern const int b2p[MAX_BPF_JIT_REG + 1]; -#endif +#define SEEN_TAILCALL 0x40000000 /* uses tail calls */ struct codegen_context { /* @@ -150,9 +132,19 @@ struct codegen_context { unsigned int seen; unsigned int idx; unsigned int stack_size; - int b2p[ARRAY_SIZE(b2p)]; + int b2p[MAX_BPF_JIT_REG + 2]; + unsigned int exentry_idx; + unsigned int alt_exit_addr; }; +#define bpf_to_ppc(r) (ctx->b2p[r]) + +#ifdef CONFIG_PPC32 +#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */ +#else +#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */ +#endif + static inline void bpf_flush_icache(void *start, void *end) { smp_wmb(); /* smp write barrier */ @@ -174,12 +166,17 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i) ctx->seen &= ~(1 << (31 - i)); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); +void bpf_jit_init_reg_mapping(struct codegen_context *ctx); +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func); int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass); + u32 *addrs, int pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); + +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, + int insn_idx, int jmp_off, int dst_reg); #endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h deleted file mode 100644 index b63b35e45e55..000000000000 --- a/arch/powerpc/net/bpf_jit64.h +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * bpf_jit64.h: BPF JIT compiler for PPC64 - * - * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> - * IBM Corporation - */ -#ifndef _BPF_JIT64_H -#define _BPF_JIT64_H - -#include "bpf_jit.h" - -/* - * Stack layout: - * Ensure the top half (upto local_tmp_var) stays consistent - * with our redzone usage. - * - * [ prev sp ] <------------- - * [ nv gpr save area ] 5*8 | - * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 16 | - * fp (r31) --> [ ebpf stack space ] upto 512 | - * [ frame header ] 32/112 | - * sp (r1) ---> [ stack pointer ] -------------- - */ - -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (5*8) -/* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 24 -/* stack frame excluding BPF stack, ensure this is quadword aligned */ -#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ - BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) - -#ifndef __ASSEMBLY__ - -/* BPF register usage */ -#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) - -/* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 2] = { - /* function return value */ - [BPF_REG_0] = 8, - /* function arguments */ - [BPF_REG_1] = 3, - [BPF_REG_2] = 4, - [BPF_REG_3] = 5, - [BPF_REG_4] = 6, - [BPF_REG_5] = 7, - /* non volatile registers */ - [BPF_REG_6] = 27, - [BPF_REG_7] = 28, - [BPF_REG_8] = 29, - [BPF_REG_9] = 30, - /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 31, - /* eBPF jit internal registers */ - [BPF_REG_AX] = 2, - [TMP_REG_1] = 9, - [TMP_REG_2] = 10 -}; - -/* PPC NVR range -- update this if we ever use NVRs below r27 */ -#define BPF_PPC_NVR_MIN 27 - -/* - * WARNING: These can use TMP_REG_2 if the offset is not at word boundary, - * so ensure that it isn't in use already. - */ -#define PPC_BPF_LL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_LDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_LD(r, base, i)); \ - } while(0) -#define PPC_BPF_STL(r, base, i) do { \ - if ((i) % 4) { \ - EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\ - EMIT(PPC_RAW_STDX(r, base, \ - b2p[TMP_REG_2])); \ - } else \ - EMIT(PPC_RAW_STD(r, base, i)); \ - } while(0) -#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0) - -#endif /* !__ASSEMBLY__ */ - -#endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 90ce75f0f1e2..43e634126514 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) +/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */ +static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) { const struct bpf_insn *insn = fp->insnsi; bool func_addr_fixed; u64 func_addr; u32 tmp_idx; - int i, ret; + int i, j, ret; for (i = 0; i < fp->len; i++) { /* @@ -59,19 +59,54 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, */ tmp_idx = ctx->idx; ctx->idx = addrs[i] / 4; - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; /* * Restore ctx->idx here. This is safe as the length * of the JITed sequence remains unchanged. */ ctx->idx = tmp_idx; + } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; +#ifdef CONFIG_PPC32 + PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm); + PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm); + for (j = ctx->idx - addrs[i] / 4; j < 4; j++) + EMIT(PPC_RAW_NOP()); +#else + func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); + PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr); + /* overwrite rest with nops */ + for (j = ctx->idx - addrs[i] / 4; j < 5; j++) + EMIT(PPC_RAW_NOP()); +#endif + ctx->idx = tmp_idx; + i++; } } return 0; } +int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) +{ + if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { + PPC_JMP(exit_addr); + } else if (ctx->alt_exit_addr) { + if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4)))) + return -1; + PPC_JMP(ctx->alt_exit_addr); + } else { + ctx->alt_exit_addr = ctx->idx * 4; + bpf_jit_build_epilogue(image, ctx); + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -101,6 +136,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *tmp_fp; bool bpf_blinded = false; bool extra_pass = false; + u32 extable_len; + u32 fixup_len; if (!fp->jit_requested) return org_fp; @@ -131,7 +168,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) image = jit_data->image; bpf_hdr = jit_data->header; proglen = jit_data->proglen; - alloclen = proglen + FUNCTION_DESCR_SIZE; extra_pass = true; goto skip_init_ctx; } @@ -143,13 +179,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) } memset(&cgctx, 0, sizeof(struct codegen_context)); - memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p)); + bpf_jit_init_reg_mapping(&cgctx); /* Make sure that the stack is quadword aligned. */ cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { /* We hit something illegal or unsupported. */ fp = org_fp; goto out_addrs; @@ -159,10 +195,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * If we have seen a tail call, we need a second pass. * This is because bpf_jit_emit_common_epilogue() is called * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen. + * We also need a second pass if we ended up with too large + * a program so as to ensure BPF_EXIT branches are in range. */ - if (cgctx.seen & SEEN_TAILCALL) { + if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) { cgctx.idx = 0; - if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) { fp = org_fp; goto out_addrs; } @@ -175,10 +213,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) * calculate total size from idx. */ bpf_jit_build_prologue(0, &cgctx); + addrs[fp->len] = cgctx.idx * 4; bpf_jit_build_epilogue(0, &cgctx); + fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4; + extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry); + proglen = cgctx.idx * 4; - alloclen = proglen + FUNCTION_DESCR_SIZE; + alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len; bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); if (!bpf_hdr) { @@ -186,6 +228,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto out_addrs; } + if (extable_len) + fp->aux->extable = (void *)image + FUNCTION_DESCR_SIZE + proglen + fixup_len; + skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); @@ -193,13 +238,13 @@ skip_init_ctx: /* * Do not touch the prologue and epilogue as they will remain * unchanged. Only fix the branch target address for subprog - * calls in the body. + * calls in the body, and ldimm64 instructions. * * This does not change the offsets and lengths of the subprog * call instruction sequences and hence, the size of the JITed * image as well. */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs); /* There is no need to perform the usual passes. */ goto skip_codegen_passes; @@ -209,8 +254,9 @@ skip_init_ctx: for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; + cgctx.alt_exit_addr = 0; bpf_jit_build_prologue(code_base, &cgctx); - if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass)) { + if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) { bpf_jit_binary_free(bpf_hdr); fp = org_fp; goto out_addrs; @@ -230,7 +276,7 @@ skip_codegen_passes: */ bpf_jit_dump(flen, proglen, pass, code_base); -#ifdef PPC64_ELF_ABI_v1 +#ifdef CONFIG_PPC64_ELF_ABI_V1 /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; @@ -238,9 +284,9 @@ skip_codegen_passes: fp->bpf_func = (void *)image; fp->jited = 1; - fp->jited_len = alloclen; + fp->jited_len = proglen + FUNCTION_DESCR_SIZE; - bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + bpf_hdr->size); if (!fp->is_func || extra_pass) { bpf_jit_binary_lock_ro(bpf_hdr); bpf_prog_fill_jited_linfo(fp, addrs); @@ -262,3 +308,52 @@ out: return fp; } + +/* + * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling + * this function, as this only applies to BPF_PROBE_MEM, for now. + */ +int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx, + int insn_idx, int jmp_off, int dst_reg) +{ + off_t offset; + unsigned long pc; + struct exception_table_entry *ex; + u32 *fixup; + + /* Populate extable entries only in the last pass */ + if (pass != 2) + return 0; + + if (!fp->aux->extable || + WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries)) + return -EINVAL; + + pc = (unsigned long)&image[insn_idx]; + + fixup = (void *)fp->aux->extable - + (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) + + (ctx->exentry_idx * BPF_FIXUP_LEN * 4); + + fixup[0] = PPC_RAW_LI(dst_reg, 0); + if (IS_ENABLED(CONFIG_PPC32)) + fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ + + fixup[BPF_FIXUP_LEN - 1] = + PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]); + + ex = &fp->aux->extable[ctx->exentry_idx]; + + offset = pc - (long)&ex->insn; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->insn = offset; + + offset = (long)fixup - (long)&ex->fixup; + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) + return -ERANGE; + ex->fixup = offset; + + ctx->exentry_idx++; + return 0; +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 0da31d41d413..43f1c76d48ce 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -33,40 +33,38 @@ /* stack frame, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size) +#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0)) + +/* PPC NVR range -- update this if we ever use NVRs below r17 */ +#define BPF_PPC_NVR_MIN _R17 +#define BPF_PPC_TC _R16 + /* BPF register usage */ -#define TMP_REG (MAX_BPF_JIT_REG + 0) +#define TMP_REG (MAX_BPF_JIT_REG + 0) /* BPF to ppc register mappings */ -const int b2p[MAX_BPF_JIT_REG + 1] = { +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ /* function return value */ - [BPF_REG_0] = 12, + ctx->b2p[BPF_REG_0] = _R12; /* function arguments */ - [BPF_REG_1] = 4, - [BPF_REG_2] = 6, - [BPF_REG_3] = 8, - [BPF_REG_4] = 10, - [BPF_REG_5] = 22, + ctx->b2p[BPF_REG_1] = _R4; + ctx->b2p[BPF_REG_2] = _R6; + ctx->b2p[BPF_REG_3] = _R8; + ctx->b2p[BPF_REG_4] = _R10; + ctx->b2p[BPF_REG_5] = _R22; /* non volatile registers */ - [BPF_REG_6] = 24, - [BPF_REG_7] = 26, - [BPF_REG_8] = 28, - [BPF_REG_9] = 30, + ctx->b2p[BPF_REG_6] = _R24; + ctx->b2p[BPF_REG_7] = _R26; + ctx->b2p[BPF_REG_8] = _R28; + ctx->b2p[BPF_REG_9] = _R30; /* frame pointer aka BPF_REG_10 */ - [BPF_REG_FP] = 18, + ctx->b2p[BPF_REG_FP] = _R18; /* eBPF jit internal registers */ - [BPF_REG_AX] = 20, - [TMP_REG] = 31, /* 32 bits */ -}; - -static int bpf_to_ppc(struct codegen_context *ctx, int reg) -{ - return ctx->b2p[reg]; + ctx->b2p[BPF_REG_AX] = _R20; + ctx->b2p[TMP_REG] = _R31; /* 32 bits */ } -/* PPC NVR range -- update this if we ever use NVRs below r17 */ -#define BPF_PPC_NVR_MIN 17 -#define BPF_PPC_TC 16 - static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) { if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC) @@ -77,14 +75,22 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) return BPF_PPC_STACKFRAME(ctx) - 4; } +#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */ +#define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */ +#define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */ + void bpf_jit_realloc_regs(struct codegen_context *ctx) { + unsigned int nvreg_mask; + if (ctx->seen & SEEN_FUNC) - return; + nvreg_mask = SEEN_NVREG_TEMP_MASK; + else + nvreg_mask = SEEN_NVREG_FULL_MASK; - while (ctx->seen & SEEN_NVREG_MASK && + while (ctx->seen & nvreg_mask && (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) { - int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab)); + int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab)); int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa)); int i; @@ -108,8 +114,8 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) int i; /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_1) - 1, 0)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); /* @@ -118,7 +124,7 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_1) - 1, _R1, + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); else EMIT(PPC_RAW_NOP()); @@ -140,15 +146,15 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i))); /* If needed retrieve arguments 9 and 10, ie 5th 64 bits arg.*/ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); - EMIT(PPC_RAW_LWZ(bpf_to_ppc(ctx, BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5) - 1, _R1, BPF_PPC_STACKFRAME(ctx)) + 8); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(BPF_REG_5), _R1, BPF_PPC_STACKFRAME(ctx)) + 12); } /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_FP))) { - EMIT(PPC_RAW_LI(bpf_to_ppc(ctx, BPF_REG_FP) - 1, 0)); - EMIT(PPC_RAW_ADDI(bpf_to_ppc(ctx, BPF_REG_FP), _R1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) { + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -168,7 +174,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) { - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_0))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); bpf_jit_emit_common_epilogue(image, ctx); @@ -185,12 +191,15 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { s32 rel = (s32)func - (s32)(image + ctx->idx); if (image && rel < 0x2000000 && rel >= -0x2000000) { - PPC_BL_ABS(func); + PPC_BL(func); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); } else { /* Load function address into r0 */ EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); @@ -198,6 +207,8 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun EMIT(PPC_RAW_MTCTR(_R0)); EMIT(PPC_RAW_BCTRL()); } + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -208,8 +219,8 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r5-r6/BPF_REG_2 - pointer to bpf_array * r7-r8/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = bpf_to_ppc(ctx, BPF_REG_2); - int b2p_index = bpf_to_ppc(ctx, BPF_REG_3); + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); /* * if (index >= array->map.max_entries) @@ -218,16 +229,16 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_CMPLW(b2p_index, _R0)); EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - PPC_BCC(COND_GE, out); + PPC_BCC_SHORT(COND_GE, out); /* - * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT)); /* tail_call_cnt++; */ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1)); - PPC_BCC(COND_GT, out); + PPC_BCC_SHORT(COND_GE, out); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); @@ -240,7 +251,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * goto out; */ EMIT(PPC_RAW_CMPLWI(_R3, 0)); - PPC_BCC(COND_EQ, out); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); @@ -255,7 +266,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_MTCTR(_R3)); - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(ctx, BPF_REG_1))); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -268,7 +279,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs, int pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -279,16 +290,21 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = bpf_to_ppc(ctx, insn[i].dst_reg); + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); u32 dst_reg_h = dst_reg - 1; - u32 src_reg = bpf_to_ppc(ctx, insn[i].src_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); u32 src_reg_h = src_reg - 1; - u32 tmp_reg = bpf_to_ppc(ctx, TMP_REG); + u32 ax_reg = bpf_to_ppc(BPF_REG_AX); + u32 tmp_reg = bpf_to_ppc(TMP_REG); + u32 size = BPF_SIZE(code); + u32 save_reg, ret_reg; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; u64 func_addr; u32 true_cond; + u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -784,25 +800,71 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_STX ATOMIC (atomic ops) */ case BPF_STX | BPF_ATOMIC | BPF_W: - if (imm != BPF_ADD) { - pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n", - code, i); - return -ENOTSUPP; - } - - /* *(u32 *)(dst + off) += src */ + save_reg = _R0; + ret_reg = src_reg; bpf_set_seen_register(ctx, tmp_reg); + bpf_set_seen_register(ctx, ax_reg); + /* Get offset into TMP_REG */ EMIT(PPC_RAW_LI(tmp_reg, off)); + tmp_idx = ctx->idx * 4; /* load value from memory into r0 */ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); - /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); - /* store result back */ - EMIT(PPC_RAW_STWCX(_R0, tmp_reg, dst_reg)); + + /* Save old value in BPF_REG_AX */ + if (imm & BPF_FETCH) + EMIT(PPC_RAW_MR(ax_reg, _R0)); + + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + EMIT(PPC_RAW_ADD(_R0, _R0, src_reg)); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + EMIT(PPC_RAW_AND(_R0, _R0, src_reg)); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + EMIT(PPC_RAW_OR(_R0, _R0, src_reg)); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + EMIT(PPC_RAW_XOR(_R0, _R0, src_reg)); + break; + case BPF_CMPXCHG: + /* + * Return old value in BPF_REG_0 for BPF_CMPXCHG & + * in src_reg for other cases. + */ + ret_reg = bpf_to_ppc(BPF_REG_0); + + /* Compare with old value in BPF_REG_0 */ + EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0)); + /* Don't set if different from old value */ + PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4); + fallthrough; + case BPF_XCHG: + save_reg = src_reg; + break; + default: + pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n", + code, i); + return -EOPNOTSUPP; + } + + /* store new value */ + EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg)); /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_NE, (ctx->idx - 3) * 4); + PPC_BCC_SHORT(COND_NE, tmp_idx); + + /* For the BPF_FETCH variant, get old data into src_reg */ + if (imm & BPF_FETCH) { + EMIT(PPC_RAW_MR(ret_reg, ax_reg)); + if (!fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */ + } break; case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */ @@ -812,23 +874,91 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * BPF_LDX */ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (!fp->aux->verifier_zext) - EMIT(PPC_RAW_LI(dst_reg_h, 0)); - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ - EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM + * load only if addr is kernel address (see is_kernel_addr()), otherwise + * set dst_reg=0 and move on. + */ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + PPC_LI32(_R0, TASK_SIZE - off); + EMIT(PPC_RAW_CMPLW(src_reg, _R0)); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* + * For BPF_DW case, "li reg_h,0" would be needed when + * !fp->aux->verifier_zext. Emit NOP otherwise. + * + * Note that "li reg_h,0" is emitted for BPF_B/H/W case, + * if necessary. So, jump there insted of emitting an + * additional "li reg_h,0" instruction. + */ + if (size == BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + else + EMIT(PPC_RAW_NOP()); + /* + * Need to jump two instructions instead of one for BPF_DW case + * as there are two load instructions for dst_reg_h & dst_reg + * respectively. + */ + if (size == BPF_DW) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off)); + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4)); + break; + } + + if (size != BPF_DW && !fp->aux->verifier_zext) + EMIT(PPC_RAW_LI(dst_reg_h, 0)); + + if (BPF_MODE(code) == BPF_PROBE_MEM) { + int insn_idx = ctx->idx - 1; + int jmp_off = 4; + + /* + * In case of BPF_DW, two lwz instructions are emitted, one + * for higher 32-bit and another for lower 32-bit. So, set + * ex->insn to the first of the two and jump over both + * instructions in fixup. + * + * Similarly, with !verifier_zext, two instructions are + * emitted for BPF_B/H/W case. So, set ex->insn to the + * instruction that could fault and skip over both + * instructions. + */ + if (size == BPF_DW || !fp->aux->verifier_zext) { + insn_idx -= 1; + jmp_off += 4; + } + + ret = bpf_add_extable_entry(fp, image, pass, ctx, insn_idx, + jmp_off, dst_reg); + if (ret) + return ret; + } break; /* @@ -836,8 +966,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 16 byte instruction that uses two 'struct bpf_insn' */ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + tmp_idx = ctx->idx; PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); + /* padding to allow full 4 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 4; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; @@ -851,8 +985,11 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; @@ -862,20 +999,22 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + ret = bpf_jit_get_func_addr(fp, &insn[i], false, &func_addr, &func_addr_fixed); if (ret < 0) return ret; - if (bpf_is_seen_register(ctx, bpf_to_ppc(ctx, BPF_REG_5))) { - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5) - 1, _R1, 8)); - EMIT(PPC_RAW_STW(bpf_to_ppc(ctx, BPF_REG_5), _R1, 12)); + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) { + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8)); + EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12)); } - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + if (ret) + return ret; - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0) - 1, _R3)); - EMIT(PPC_RAW_MR(bpf_to_ppc(ctx, BPF_REG_0), _R4)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4)); break; /* diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8b5157ccfeba..29ee306d6302 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -17,7 +17,60 @@ #include <linux/bpf.h> #include <asm/security_features.h> -#include "bpf_jit64.h" +#include "bpf_jit.h" + +/* + * Stack layout: + * Ensure the top half (upto local_tmp_var) stays consistent + * with our redzone usage. + * + * [ prev sp ] <------------- + * [ nv gpr save area ] 5*8 | + * [ tail_call_cnt ] 8 | + * [ local_tmp_var ] 16 | + * fp (r31) --> [ ebpf stack space ] upto 512 | + * [ frame header ] 32/112 | + * sp (r1) ---> [ stack pointer ] -------------- + */ + +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (5*8) +/* for bpf JIT code internal usage */ +#define BPF_PPC_STACK_LOCALS 24 +/* stack frame excluding BPF stack, ensure this is quadword aligned */ +#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) + +/* BPF register usage */ +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) + +/* BPF to ppc register mappings */ +void bpf_jit_init_reg_mapping(struct codegen_context *ctx) +{ + /* function return value */ + ctx->b2p[BPF_REG_0] = _R8; + /* function arguments */ + ctx->b2p[BPF_REG_1] = _R3; + ctx->b2p[BPF_REG_2] = _R4; + ctx->b2p[BPF_REG_3] = _R5; + ctx->b2p[BPF_REG_4] = _R6; + ctx->b2p[BPF_REG_5] = _R7; + /* non volatile registers */ + ctx->b2p[BPF_REG_6] = _R27; + ctx->b2p[BPF_REG_7] = _R28; + ctx->b2p[BPF_REG_8] = _R29; + ctx->b2p[BPF_REG_9] = _R30; + /* frame pointer aka BPF_REG_10 */ + ctx->b2p[BPF_REG_FP] = _R31; + /* eBPF jit internal registers */ + ctx->b2p[BPF_REG_AX] = _R12; + ctx->b2p[TMP_REG_1] = _R9; + ctx->b2p[TMP_REG_2] = _R10; +} + +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN _R27 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) { @@ -27,7 +80,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * - the bpf program uses its stack area * The latter condition is deduced from the usage of BPF_REG_FP */ - return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, b2p[BPF_REG_FP]); + return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)); } /* @@ -73,22 +126,23 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); + /* * Initialize tail_call_cnt if we do tail calls. * Otherwise, put in NOPs so that it can be skipped when we are * invoked through a tail call. */ if (ctx->seen & SEEN_TAILCALL) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0)); + EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0)); /* this goes in the redzone */ - PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { EMIT(PPC_RAW_NOP()); EMIT(PPC_RAW_NOP()); } -#define BPF_TAILCALL_PROLOGUE_SIZE 8 - if (bpf_has_stack_frame(ctx)) { /* * We need a stack frame, but we don't necessarily need to @@ -96,10 +150,10 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_FUNC) { EMIT(PPC_RAW_MFLR(_R0)); - PPC_BPF_STL(0, 1, PPC_LR_STKOFF); + EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)); } - PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size))); } /* @@ -108,12 +162,12 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) * in the protected zone below the previous stack frame */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Setup frame pointer to point to the bpf stack area */ - if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP])) - EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1, + if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) + EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, STACK_FRAME_MIN_SIZE + ctx->stack_size)); } @@ -123,15 +177,15 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) - if (bpf_is_seen_register(ctx, b2p[i])) - PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); + if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) + EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { - EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size)); + EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size)); if (ctx->seen & SEEN_FUNC) { - PPC_BPF_LL(0, 1, PPC_LR_STKOFF); - EMIT(PPC_RAW_MTLR(0)); + EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); } } } @@ -141,42 +195,45 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); /* Move result to r3 */ - EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0])); + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); } -static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, - u64 func) +static int bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, u64 func) { -#ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to CTR */ - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - EMIT(PPC_RAW_MTCTR(12)); -#endif + unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; + long reladdr; + + if (WARN_ON_ONCE(!core_kernel_text(func_addr))) + return -EINVAL; + + reladdr = func_addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func); + return -ERANGE; + } + + EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr))); + EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr))); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } -void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func) { unsigned int i, ctx_idx = ctx->idx; + if (WARN_ON_ONCE(func && is_module_text_address(func))) + return -EINVAL; + + /* skip past descriptor if elf v1 */ + func += FUNCTION_DESCR_SIZE; + /* Load function address into r12 */ - PPC_LI64(12, func); + PPC_LI64(_R12, func); /* For bpf-to-bpf function calls, the callee's address is unknown * until the last extra pass. As seen above, we use PPC_LI64() to @@ -191,20 +248,10 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun for (i = ctx->idx - ctx_idx; i < 5; i++) EMIT(PPC_RAW_NOP()); -#ifdef PPC64_ELF_ABI_v1 - /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. - */ - PPC_BPF_LL(2, 12, 8); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(12, 12, 0); -#endif - - EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); + + return 0; } static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) @@ -215,54 +262,53 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o * r4/BPF_REG_2 - pointer to bpf_array * r5/BPF_REG_3 - index in bpf_array */ - int b2p_bpf_array = b2p[BPF_REG_2]; - int b2p_index = b2p[BPF_REG_3]; + int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); + int b2p_index = bpf_to_ppc(BPF_REG_3); + int bpf_tailcall_prologue_size = 8; + + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + bpf_tailcall_prologue_size += 4; /* skip past the toc load */ /* * if (index >= array->map.max_entries) * goto out; */ - EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); + EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries))); EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31)); - EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1])); - PPC_BCC(COND_GE, out); + EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1))); + PPC_BCC_SHORT(COND_GE, out); /* - * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) * goto out; */ - PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); - EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT)); - PPC_BCC(COND_GT, out); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); + PPC_BCC_SHORT(COND_GE, out); /* * tail_call_cnt++; */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1)); - PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); /* prog = array->ptrs[index]; */ - EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array)); - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); + EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs))); /* * if (prog == NULL) * goto out; */ - EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0)); - PPC_BCC(COND_EQ, out); + EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0)); + PPC_BCC_SHORT(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); -#ifdef PPC64_ELF_ABI_v1 - /* skip past the function descriptor */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], - FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE)); -#else - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE)); -#endif - EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1])); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), + FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size)); + EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1))); /* tear down stack, restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); @@ -297,7 +343,7 @@ asm ( /* Assemble the body code between the prologue & epilogue */ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs, bool extra_pass) + u32 *addrs, int pass) { enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; @@ -309,8 +355,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * for (i = 0; i < flen; i++) { u32 code = insn[i].code; - u32 dst_reg = b2p[insn[i].dst_reg]; - u32 src_reg = b2p[insn[i].src_reg]; + u32 dst_reg = bpf_to_ppc(insn[i].dst_reg); + u32 src_reg = bpf_to_ppc(insn[i].src_reg); + u32 size = BPF_SIZE(code); + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); + u32 save_reg, ret_reg; s16 off = insn[i].off; s32 imm = insn[i].imm; bool func_addr_fixed; @@ -318,6 +368,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u64 imm64; u32 true_cond; u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -360,8 +411,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm >= -32768 && imm < 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ @@ -371,8 +422,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } else if (imm > -32768 && imm <= 32768) { EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm))); } else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -387,32 +438,28 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * if (imm >= -32768 && imm < 32768) EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm))); else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_ALU) - EMIT(PPC_RAW_MULW(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_MULD(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg)); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg)); break; @@ -431,35 +478,23 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * } } - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); switch (BPF_CLASS(code)) { case BPF_ALU: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg)); break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2], - dst_reg, - b2p[TMP_REG_1])); - EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], - b2p[TMP_REG_1], - b2p[TMP_REG_2])); - EMIT(PPC_RAW_SUB(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg)); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg)); } else - EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg)); break; } goto bpf_alu32_trunc; @@ -481,8 +516,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm))); else { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg)); } goto bpf_alu32_trunc; case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ @@ -493,8 +528,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm))); @@ -510,8 +545,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { /* Sign-extended */ - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1])); + PPC_LI32(tmp1_reg, imm); + EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg)); } else { if (IMM_L(imm)) EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm))); @@ -612,11 +647,11 @@ bpf_alu32_trunc: switch (imm) { case 16: /* Rotate 8 bits left & mask with 0x0000ff00 */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23)); /* Rotate 8 bits right & insert LSB to reg */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31)); /* Move result back to dst_reg */ - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 32: /* @@ -624,25 +659,29 @@ bpf_alu32_trunc: * 2 bytes are already in their final position * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) */ - EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31)); + EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31)); /* Rotate 24 bits and insert byte 1 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7)); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7)); /* Rotate 24 bits and insert byte 3 */ - EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23)); - EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); + EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23)); + EMIT(PPC_RAW_MR(dst_reg, tmp1_reg)); break; case 64: - /* - * Way easier and faster(?) to store the value - * into stack and then use ldbrx - * - * ctx->seen will be reliable in pass2, but - * the instructions generated will remain the - * same across all passes - */ - PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + /* Store the value to stack and then use byte-reverse loads */ + EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx))); + EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx))); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg)); + } else { + EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg)); + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); + EMIT(PPC_RAW_LI(tmp2_reg, 4)); + EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg)); + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg)); + } break; } break; @@ -680,15 +719,14 @@ emit_clear: break; case STF_BARRIER_SYNC_ORI: EMIT(PPC_RAW_SYNC()); - EMIT(PPC_RAW_LD(b2p[TMP_REG_1], _R13, 0)); + EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0)); EMIT(PPC_RAW_ORI(_R31, _R31, 0)); break; case STF_BARRIER_FALLBACK: - EMIT(PPC_RAW_MFLR(b2p[TMP_REG_1])); - PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); - EMIT(PPC_RAW_MTCTR(12)); + ctx->seen |= SEEN_FUNC; + PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(_R12)); EMIT(PPC_RAW_BCTRL()); - EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); break; case STF_BARRIER_NONE: break; @@ -701,76 +739,122 @@ emit_clear: case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STB(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm)); - src_reg = b2p[TMP_REG_1]; + EMIT(PPC_RAW_LI(tmp1_reg, imm)); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STH(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; } EMIT(PPC_RAW_STW(src_reg, dst_reg, off)); break; case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ if (BPF_CLASS(code) == BPF_ST) { - PPC_LI32(b2p[TMP_REG_1], imm); - src_reg = b2p[TMP_REG_1]; + PPC_LI32(tmp1_reg, imm); + src_reg = tmp1_reg; + } + if (off % 4) { + EMIT(PPC_RAW_LI(tmp2_reg, off)); + EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg)); + } else { + EMIT(PPC_RAW_STD(src_reg, dst_reg, off)); } - PPC_BPF_STL(src_reg, dst_reg, off); break; /* * BPF_STX ATOMIC (atomic ops) */ case BPF_STX | BPF_ATOMIC | BPF_W: - if (imm != BPF_ADD) { - pr_err_ratelimited( - "eBPF filter atomic op code %02x (@%d) unsupported\n", - code, i); - return -ENOTSUPP; - } - - /* *(u32 *)(dst + off) += src */ + case BPF_STX | BPF_ATOMIC | BPF_DW: + save_reg = tmp2_reg; + ret_reg = src_reg; - /* Get EA into TMP_REG_1 */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); + /* Get offset into TMP_REG_1 */ + EMIT(PPC_RAW_LI(tmp1_reg, off)); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ - EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); - /* add value from src_reg into this */ - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); - /* store result back */ - EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); - /* we're done if this succeeded */ - PPC_BCC_SHORT(COND_NE, tmp_idx); - break; - case BPF_STX | BPF_ATOMIC | BPF_DW: - if (imm != BPF_ADD) { + if (size == BPF_DW) + EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0)); + else + EMIT(PPC_RAW_LWARX(tmp2_reg, tmp1_reg, dst_reg, 0)); + + /* Save old value in _R0 */ + if (imm & BPF_FETCH) + EMIT(PPC_RAW_MR(_R0, tmp2_reg)); + + switch (imm) { + case BPF_ADD: + case BPF_ADD | BPF_FETCH: + EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_AND: + case BPF_AND | BPF_FETCH: + EMIT(PPC_RAW_AND(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_OR: + case BPF_OR | BPF_FETCH: + EMIT(PPC_RAW_OR(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_XOR: + case BPF_XOR | BPF_FETCH: + EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg)); + break; + case BPF_CMPXCHG: + /* + * Return old value in BPF_REG_0 for BPF_CMPXCHG & + * in src_reg for other cases. + */ + ret_reg = bpf_to_ppc(BPF_REG_0); + + /* Compare with old value in BPF_R0 */ + if (size == BPF_DW) + EMIT(PPC_RAW_CMPD(bpf_to_ppc(BPF_REG_0), tmp2_reg)); + else + EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), tmp2_reg)); + /* Don't set if different from old value */ + PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4); + fallthrough; + case BPF_XCHG: + save_reg = src_reg; + break; + default: pr_err_ratelimited( "eBPF filter atomic op code %02x (@%d) unsupported\n", code, i); - return -ENOTSUPP; + return -EOPNOTSUPP; } - /* *(u64 *)(dst + off) += src */ - EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off)); - tmp_idx = ctx->idx * 4; - EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0)); - EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg)); - EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1])); + /* store new value */ + if (size == BPF_DW) + EMIT(PPC_RAW_STDCX(save_reg, tmp1_reg, dst_reg)); + else + EMIT(PPC_RAW_STWCX(save_reg, tmp1_reg, dst_reg)); + /* we're done if this succeeded */ PPC_BCC_SHORT(COND_NE, tmp_idx); + + if (imm & BPF_FETCH) { + EMIT(PPC_RAW_MR(ret_reg, _R0)); + /* + * Skip unnecessary zero-extension for 32-bit cmpxchg. + * For context, see commit 39491867ace5. + */ + if (size != BPF_DW && imm == BPF_CMPXCHG && + insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + } break; /* @@ -778,25 +862,70 @@ emit_clear: */ /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: - EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_B: /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: - EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: - EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); - if (insn_is_zext(&insn[i + 1])) - addrs[++i] = ctx->idx * 4; - break; + case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: - PPC_BPF_LL(dst_reg, src_reg, off); + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* + * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid + * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM + * load only if addr is kernel address (see is_kernel_addr()), otherwise + * set dst_reg=0 and move on. + */ + if (BPF_MODE(code) == BPF_PROBE_MEM) { + EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off)); + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) + PPC_LI64(tmp2_reg, 0x8000000000000000ul); + else /* BOOK3S_64 */ + PPC_LI64(tmp2_reg, PAGE_OFFSET); + EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg)); + PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4); + EMIT(PPC_RAW_LI(dst_reg, 0)); + /* + * Check if 'off' is word aligned for BPF_DW, because + * we might generate two instructions. + */ + if (BPF_SIZE(code) == BPF_DW && (off & 3)) + PPC_JMP((ctx->idx + 3) * 4); + else + PPC_JMP((ctx->idx + 2) * 4); + } + + switch (size) { + case BPF_B: + EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off)); + break; + case BPF_H: + EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off)); + break; + case BPF_W: + EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off)); + break; + case BPF_DW: + if (off % 4) { + EMIT(PPC_RAW_LI(tmp1_reg, off)); + EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg)); + } else { + EMIT(PPC_RAW_LD(dst_reg, src_reg, off)); + } + break; + } + + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; + + if (BPF_MODE(code) == BPF_PROBE_MEM) { + ret = bpf_add_extable_entry(fp, image, pass, ctx, ctx->idx - 1, + 4, dst_reg); + if (ret) + return ret; + } break; /* @@ -806,9 +935,13 @@ emit_clear: case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ imm64 = ((u64)(u32) insn[i].imm) | (((u64)(u32) insn[i+1].imm) << 32); + tmp_idx = ctx->idx; + PPC_LI64(dst_reg, imm64); + /* padding to allow full 5 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 5; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; - PPC_LI64(dst_reg, imm64); break; /* @@ -820,8 +953,11 @@ emit_clear: * the epilogue. If we _are_ the last instruction, * we'll just fall through to the epilogue. */ - if (i != flen - 1) - PPC_JMP(exit_addr); + if (i != flen - 1) { + ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr); + if (ret) + return ret; + } /* else fall through to the epilogue */ break; @@ -831,17 +967,21 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + ret = bpf_jit_get_func_addr(fp, &insn[i], false, &func_addr, &func_addr_fixed); if (ret < 0) return ret; if (func_addr_fixed) - bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_hlp(image, ctx, func_addr); else - bpf_jit_emit_func_call_rel(image, ctx, func_addr); + ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + if (ret) + return ret; + /* move return value from r3 to BPF_REG_0 */ - EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3)); + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3)); break; /* @@ -947,14 +1087,10 @@ cond_branch: case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg, - src_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg)); } else { - int tmp_reg = b2p[TMP_REG_1]; - - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0, - 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31)); } break; case BPF_JMP | BPF_JNE | BPF_K: @@ -983,14 +1119,12 @@ cond_branch: EMIT(PPC_RAW_CMPLDI(dst_reg, imm)); } else { /* sign-extending load */ - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); /* ... but unsigned comparison */ if (is_jmp32) - EMIT(PPC_RAW_CMPLW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPLD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg)); } break; } @@ -1015,13 +1149,11 @@ cond_branch: else EMIT(PPC_RAW_CMPDI(dst_reg, imm)); } else { - PPC_LI32(b2p[TMP_REG_1], imm); + PPC_LI32(tmp1_reg, imm); if (is_jmp32) - EMIT(PPC_RAW_CMPW(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg)); else - EMIT(PPC_RAW_CMPD(dst_reg, - b2p[TMP_REG_1])); + EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg)); } break; } @@ -1030,19 +1162,16 @@ cond_branch: /* andi does not sign-extend the immediate */ if (imm >= 0 && imm < 32768) /* PPC_ANDI is _only/always_ dot-form */ - EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm)); + EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm)); else { - int tmp_reg = b2p[TMP_REG_1]; - - PPC_LI32(tmp_reg, imm); + PPC_LI32(tmp1_reg, imm); if (BPF_CLASS(code) == BPF_JMP) { - EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg, - tmp_reg)); + EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, + tmp1_reg)); } else { - EMIT(PPC_RAW_AND(tmp_reg, dst_reg, - tmp_reg)); - EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, - 0, 0, 31)); + EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg)); + EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, + 0, 0, 31)); } } break; |