aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/s390/net/bpf_jit_comp.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/net/bpf_jit_comp.c')
-rw-r--r--arch/s390/net/bpf_jit_comp.c715
1 files changed, 678 insertions, 37 deletions
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index af35052d06ed..d0846ba818ee 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -30,6 +30,7 @@
#include <asm/facility.h>
#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
+#include <asm/text-patching.h>
#include "bpf_jit.h"
struct bpf_jit {
@@ -50,12 +51,13 @@ struct bpf_jit {
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int excnt; /* Number of exception table entries */
+ int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
+ int prologue_plt; /* Start of prologue hotpatch PLT */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
#define SEEN_LITERAL BIT(1) /* code uses literals */
#define SEEN_FUNC BIT(2) /* calls C functions */
-#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
@@ -68,6 +70,10 @@ struct bpf_jit {
#define REG_0 REG_W0 /* Register 0 */
#define REG_1 REG_W1 /* Register 1 */
#define REG_2 BPF_REG_1 /* Register 2 */
+#define REG_3 BPF_REG_2 /* Register 3 */
+#define REG_4 BPF_REG_3 /* Register 4 */
+#define REG_7 BPF_REG_6 /* Register 7 */
+#define REG_8 BPF_REG_7 /* Register 8 */
#define REG_14 BPF_REG_0 /* Register 14 */
/*
@@ -507,20 +513,58 @@ static void bpf_skip(struct bpf_jit *jit, int size)
}
/*
+ * PLT for hotpatchable calls. The calling convention is the same as for the
+ * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
+ */
+extern const char bpf_plt[];
+extern const char bpf_plt_ret[];
+extern const char bpf_plt_target[];
+extern const char bpf_plt_end[];
+#define BPF_PLT_SIZE 32
+asm(
+ ".pushsection .rodata\n"
+ " .align 8\n"
+ "bpf_plt:\n"
+ " lgrl %r0,bpf_plt_ret\n"
+ " lgrl %r1,bpf_plt_target\n"
+ " br %r1\n"
+ " .align 8\n"
+ "bpf_plt_ret: .quad 0\n"
+ "bpf_plt_target: .quad 0\n"
+ "bpf_plt_end:\n"
+ " .popsection\n"
+);
+
+static void bpf_jit_plt(void *plt, void *ret, void *target)
+{
+ memcpy(plt, bpf_plt, BPF_PLT_SIZE);
+ *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
+ *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target;
+}
+
+/*
* Emit function prologue
*
* Save registers and create stack frame if necessary.
- * See stack frame layout desription in "bpf_jit.h"!
+ * See stack frame layout description in "bpf_jit.h"!
*/
-static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
+static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
+ u32 stack_depth)
{
- if (jit->seen & SEEN_TAIL_CALL) {
+ /* No-op for hotpatching */
+ /* brcl 0,prologue_plt */
+ EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
+ jit->prologue_plt_ret = jit->prg;
+
+ if (fp->aux->func_idx == 0) {
+ /* Initialize the tail call counter in the main program. */
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
/*
- * There are no tail calls. Insert nops in order to have
- * tail_call_start at a predictable offset.
+ * Skip the tail call counter initialization in subprograms.
+ * Insert nops in order to have tail_call_start at a
+ * predictable offset.
*/
bpf_skip(jit, 6);
}
@@ -558,6 +602,43 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
}
/*
+ * Emit an expoline for a jump that follows
+ */
+static void emit_expoline(struct bpf_jit *jit)
+{
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+}
+
+/*
+ * Emit __s390_indirect_jump_r1 thunk if necessary
+ */
+static void emit_r1_thunk(struct bpf_jit *jit)
+{
+ if (nospec_uses_trampoline()) {
+ jit->r1_thunk_ip = jit->prg;
+ emit_expoline(jit);
+ /* br %r1 */
+ _EMIT2(0x07f1);
+ }
+}
+
+/*
+ * Call r1 either directly or via __s390_indirect_jump_r1 thunk
+ */
+static void call_r1(struct bpf_jit *jit)
+{
+ if (nospec_uses_trampoline())
+ /* brasl %r14,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ else
+ /* basr %r14,%r1 */
+ EMIT2(0x0d00, REG_14, REG_1);
+}
+
+/*
* Function epilogue
*/
static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
@@ -570,25 +651,20 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
if (nospec_uses_trampoline()) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
+ emit_expoline(jit);
}
/* br %r14 */
_EMIT2(0x07fe);
- if ((nospec_uses_trampoline()) &&
- (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
- jit->r1_thunk_ip = jit->prg;
- /* Generate __s390_indirect_jump_r1 thunk */
- /* exrl %r0,.+10 */
- EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
- /* j . */
- EMIT4_PCREL(0xa7f40000, 0);
- /* br %r1 */
- _EMIT2(0x07f1);
- }
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
+ emit_r1_thunk(jit);
+
+ jit->prg = ALIGN(jit->prg, 8);
+ jit->prologue_plt = jit->prg;
+ if (jit->prg_buf)
+ bpf_jit_plt(jit->prg_buf + jit->prg,
+ jit->prg_buf + jit->prologue_plt_ret, NULL);
+ jit->prg += BPF_PLT_SIZE;
}
static int get_probe_mem_regno(const u8 *insn)
@@ -663,6 +739,34 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
}
/*
+ * Sign-extend the register if necessary
+ */
+static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
+{
+ if (!(flags & BTF_FMODEL_SIGNED_ARG))
+ return 0;
+
+ switch (size) {
+ case 1:
+ /* lgbr %r,%r */
+ EMIT4(0xb9060000, r, r);
+ return 0;
+ case 2:
+ /* lghr %r,%r */
+ EMIT4(0xb9070000, r, r);
+ return 0;
+ case 4:
+ /* lgfr %r,%r */
+ EMIT4(0xb9140000, r, r);
+ return 0;
+ case 8:
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+/*
* Compile one eBPF instruction into s390x code
*
* NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
@@ -1297,9 +1401,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
case BPF_JMP | BPF_CALL:
{
- u64 func;
+ const struct btf_func_model *m;
bool func_addr_fixed;
- int ret;
+ int j, ret;
+ u64 func;
ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
&func, &func_addr_fixed);
@@ -1308,15 +1413,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
+ /*
+ * Copy the tail call counter to where the callee expects it.
+ *
+ * Note 1: The callee can increment the tail call counter, but
+ * we do not load it back, since the x86 JIT does not do this
+ * either.
+ *
+ * Note 2: We assume that the verifier does not let us call the
+ * main program, which clears the tail call counter on entry.
+ */
+ /* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
+ _EMIT6(0xd203f000 | STK_OFF_TCCNT,
+ 0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
+
+ /* Sign-extend the kfunc arguments. */
+ if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+ m = bpf_jit_find_kfunc_model(fp, insn);
+ if (!m)
+ return -1;
+
+ for (j = 0; j < m->nr_args; j++) {
+ if (sign_extend(jit, BPF_REG_1 + j,
+ m->arg_size[j],
+ m->arg_flags[j]))
+ return -1;
+ }
+ }
+
/* lgrl %w1,func */
EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
- if (nospec_uses_trampoline()) {
- /* brasl %r14,__s390_indirect_jump_r1 */
- EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
- } else {
- /* basr %r14,%w1 */
- EMIT2(0x0d00, REG_14, REG_W1);
- }
+ /* %r1() */
+ call_r1(jit);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
break;
@@ -1329,10 +1457,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* B1: pointer to ctx
* B2: pointer to bpf_array
* B3: index in bpf_array
- */
- jit->seen |= SEEN_TAIL_CALL;
-
- /*
+ *
* if (index >= array->map.max_entries)
* goto out;
*/
@@ -1393,8 +1518,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* lg %r1,bpf_func(%r1) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
offsetof(struct bpf_prog, bpf_func));
- /* bc 0xf,tail_call_start(%r1) */
- _EMIT4(0x47f01000 + jit->tail_call_start);
+ if (nospec_uses_trampoline()) {
+ jit->seen |= SEEN_FUNC;
+ /* aghi %r1,tail_call_start */
+ EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
+ /* brcl 0xf,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
+ } else {
+ /* bc 0xf,tail_call_start(%r1) */
+ _EMIT4(0x47f01000 + jit->tail_call_start);
+ }
/* out: */
if (jit->prg_buf) {
*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
@@ -1688,7 +1821,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
jit->prg = 0;
jit->excnt = 0;
- bpf_jit_prologue(jit, stack_depth);
+ bpf_jit_prologue(jit, fp, stack_depth);
if (bpf_set_addr(jit, 0) < 0)
return -1;
for (i = 0; i < fp->len; i += insn_count) {
@@ -1768,6 +1901,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_jit jit;
int pass;
+ if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
+ return orig_fp;
+
if (!fp->jit_requested)
return orig_fp;
@@ -1859,3 +1995,508 @@ out:
tmp : orig_fp);
return fp;
}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ struct {
+ u16 opc;
+ s32 disp;
+ } __packed insn;
+ char expected_plt[BPF_PLT_SIZE];
+ char current_plt[BPF_PLT_SIZE];
+ char *plt;
+ int err;
+
+ /* Verify the branch to be patched. */
+ err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
+ if (err < 0)
+ return err;
+ if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
+ return -EINVAL;
+
+ if (t == BPF_MOD_JUMP &&
+ insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
+ /*
+ * The branch already points to the destination,
+ * there is no PLT.
+ */
+ } else {
+ /* Verify the PLT. */
+ plt = (char *)ip + (insn.disp << 1);
+ err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
+ if (err < 0)
+ return err;
+ bpf_jit_plt(expected_plt, (char *)ip + 6, old_addr);
+ if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
+ return -EINVAL;
+ /* Adjust the call address. */
+ s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
+ &new_addr, sizeof(void *));
+ }
+
+ /* Adjust the mask of the branch. */
+ insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
+ s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
+
+ /* Make the new code visible to the other CPUs. */
+ text_poke_sync_lock();
+
+ return 0;
+}
+
+struct bpf_tramp_jit {
+ struct bpf_jit common;
+ int orig_stack_args_off;/* Offset of arguments placed on stack by the
+ * func_addr's original caller
+ */
+ int stack_size; /* Trampoline stack size */
+ int stack_args_off; /* Offset of stack arguments for calling
+ * func_addr, has to be at the top
+ */
+ int reg_args_off; /* Offset of register arguments for calling
+ * func_addr
+ */
+ int ip_off; /* For bpf_get_func_ip(), has to be at
+ * (ctx - 16)
+ */
+ int arg_cnt_off; /* For bpf_get_func_arg_cnt(), has to be at
+ * (ctx - 8)
+ */
+ int bpf_args_off; /* Offset of BPF_PROG context, which consists
+ * of BPF arguments followed by return value
+ */
+ int retval_off; /* Offset of return value (see above) */
+ int r7_r8_off; /* Offset of saved %r7 and %r8, which are used
+ * for __bpf_prog_enter() return value and
+ * func_addr respectively
+ */
+ int r14_off; /* Offset of saved %r14 */
+ int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */
+ int do_fexit; /* do_fexit: label */
+};
+
+static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
+{
+ /* llihf %dst_reg,val_hi */
+ EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
+ /* oilf %rdst_reg,val_lo */
+ EMIT6_IMM(0xc00d0000, dst_reg, val);
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
+ const struct btf_func_model *m,
+ struct bpf_tramp_link *tlink, bool save_ret)
+{
+ struct bpf_jit *jit = &tjit->common;
+ int cookie_off = tjit->run_ctx_off +
+ offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+ struct bpf_prog *p = tlink->link.prog;
+ int patch;
+
+ /*
+ * run_ctx.cookie = tlink->cookie;
+ */
+
+ /* %r0 = tlink->cookie */
+ load_imm64(jit, REG_W0, tlink->cookie);
+ /* stg %r0,cookie_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
+
+ /*
+ * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
+ * goto skip;
+ */
+
+ /* %r1 = __bpf_prog_enter */
+ load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
+ /* %r2 = p */
+ load_imm64(jit, REG_2, (u64)p);
+ /* la %r3,run_ctx_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
+ /* %r1() */
+ call_r1(jit);
+ /* ltgr %r7,%r2 */
+ EMIT4(0xb9020000, REG_7, REG_2);
+ /* brcl 8,skip */
+ patch = jit->prg;
+ EMIT6_PCREL_RILC(0xc0040000, 8, 0);
+
+ /*
+ * retval = bpf_func(args, p->insnsi);
+ */
+
+ /* %r1 = p->bpf_func */
+ load_imm64(jit, REG_1, (u64)p->bpf_func);
+ /* la %r2,bpf_args_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
+ /* %r3 = p->insnsi */
+ if (!p->jited)
+ load_imm64(jit, REG_3, (u64)p->insnsi);
+ /* %r1() */
+ call_r1(jit);
+ /* stg %r2,retval_off(%r15) */
+ if (save_ret) {
+ if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
+ return -1;
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+ }
+
+ /* skip: */
+ if (jit->prg_buf)
+ *(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
+
+ /*
+ * __bpf_prog_exit(p, start, &run_ctx);
+ */
+
+ /* %r1 = __bpf_prog_exit */
+ load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
+ /* %r2 = p */
+ load_imm64(jit, REG_2, (u64)p);
+ /* lgr %r3,%r7 */
+ EMIT4(0xb9040000, REG_3, REG_7);
+ /* la %r4,run_ctx_off(%r15) */
+ EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
+ /* %r1() */
+ call_r1(jit);
+
+ return 0;
+}
+
+static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
+{
+ int stack_offset = tjit->stack_size;
+
+ tjit->stack_size += size;
+ return stack_offset;
+}
+
+/* ABI uses %r2 - %r6 for parameter passing. */
+#define MAX_NR_REG_ARGS 5
+
+/* The "L" field of the "mvc" instruction is 8 bits. */
+#define MAX_MVC_SIZE 256
+#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
+
+/* -mfentry generates a 6-byte nop on s390x. */
+#define S390X_PATCH_SIZE 6
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+ struct bpf_tramp_jit *tjit,
+ const struct btf_func_model *m,
+ u32 flags,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ int nr_bpf_args, nr_reg_args, nr_stack_args;
+ struct bpf_jit *jit = &tjit->common;
+ int arg, bpf_arg_off;
+ int i, j;
+
+ /* Support as many stack arguments as "mvc" instruction can handle. */
+ nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
+ nr_stack_args = m->nr_args - nr_reg_args;
+ if (nr_stack_args > MAX_NR_STACK_ARGS)
+ return -ENOTSUPP;
+
+ /* Return to %r14, since func_addr and %r0 are not available. */
+ if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
+ flags |= BPF_TRAMP_F_SKIP_FRAME;
+
+ /*
+ * Compute how many arguments we need to pass to BPF programs.
+ * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
+ * smaller are packed into 1 or 2 registers; larger arguments are
+ * passed via pointers.
+ * In s390x ABI, arguments that are 8 bytes or smaller are packed into
+ * a register; larger arguments are passed via pointers.
+ * We need to deal with this difference.
+ */
+ nr_bpf_args = 0;
+ for (i = 0; i < m->nr_args; i++) {
+ if (m->arg_size[i] <= 8)
+ nr_bpf_args += 1;
+ else if (m->arg_size[i] <= 16)
+ nr_bpf_args += 2;
+ else
+ return -ENOTSUPP;
+ }
+
+ /*
+ * Calculate the stack layout.
+ */
+
+ /* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */
+ tjit->stack_size = STACK_FRAME_OVERHEAD;
+ tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
+ tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
+ tjit->ip_off = alloc_stack(tjit, sizeof(u64));
+ tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
+ tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
+ tjit->retval_off = alloc_stack(tjit, sizeof(u64));
+ tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
+ tjit->r14_off = alloc_stack(tjit, sizeof(u64));
+ tjit->run_ctx_off = alloc_stack(tjit,
+ sizeof(struct bpf_tramp_run_ctx));
+ /* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
+ tjit->stack_size -= STACK_FRAME_OVERHEAD;
+ tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
+
+ /* aghi %r15,-stack_size */
+ EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+ /* stmg %r2,%rN,fwd_reg_args_off(%r15) */
+ if (nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ for (i = 0, j = 0; i < m->nr_args; i++) {
+ if (i < MAX_NR_REG_ARGS)
+ arg = REG_2 + i;
+ else
+ arg = tjit->orig_stack_args_off +
+ (i - MAX_NR_REG_ARGS) * sizeof(u64);
+ bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
+ if (m->arg_size[i] <= 8) {
+ if (i < MAX_NR_REG_ARGS)
+ /* stg %arg,bpf_arg_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
+ REG_0, REG_15, bpf_arg_off);
+ else
+ /* mvc bpf_arg_off(8,%r15),arg(%r15) */
+ _EMIT6(0xd207f000 | bpf_arg_off,
+ 0xf000 | arg);
+ j += 1;
+ } else {
+ if (i < MAX_NR_REG_ARGS) {
+ /* mvc bpf_arg_off(16,%r15),0(%arg) */
+ _EMIT6(0xd20ff000 | bpf_arg_off,
+ reg2hex[arg] << 12);
+ } else {
+ /* lg %r1,arg(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
+ REG_15, arg);
+ /* mvc bpf_arg_off(16,%r15),0(%r1) */
+ _EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
+ }
+ j += 2;
+ }
+ }
+ /* stmg %r7,%r8,r7_r8_off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
+ tjit->r7_r8_off);
+ /* stg %r14,r14_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
+
+ if (flags & BPF_TRAMP_F_ORIG_STACK) {
+ /*
+ * The ftrace trampoline puts the return address (which is the
+ * address of the original function + S390X_PATCH_SIZE) into
+ * %r0; see ftrace_shared_hotpatch_trampoline_br and
+ * ftrace_init_nop() for details.
+ */
+
+ /* lgr %r8,%r0 */
+ EMIT4(0xb9040000, REG_8, REG_0);
+ } else {
+ /* %r8 = func_addr + S390X_PATCH_SIZE */
+ load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
+ }
+
+ /*
+ * ip = func_addr;
+ * arg_cnt = m->nr_args;
+ */
+
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ /* %r0 = func_addr */
+ load_imm64(jit, REG_0, (u64)func_addr);
+ /* stg %r0,ip_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+ tjit->ip_off);
+ }
+ /* lghi %r0,nr_bpf_args */
+ EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
+ /* stg %r0,arg_cnt_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
+ tjit->arg_cnt_off);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ /*
+ * __bpf_tramp_enter(im);
+ */
+
+ /* %r1 = __bpf_tramp_enter */
+ load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
+ /* %r2 = im */
+ load_imm64(jit, REG_2, (u64)im);
+ /* %r1() */
+ call_r1(jit);
+ }
+
+ for (i = 0; i < fentry->nr_links; i++)
+ if (invoke_bpf_prog(tjit, m, fentry->links[i],
+ flags & BPF_TRAMP_F_RET_FENTRY_RET))
+ return -EINVAL;
+
+ if (fmod_ret->nr_links) {
+ /*
+ * retval = 0;
+ */
+
+ /* xc retval_off(8,%r15),retval_off(%r15) */
+ _EMIT6(0xd707f000 | tjit->retval_off,
+ 0xf000 | tjit->retval_off);
+
+ for (i = 0; i < fmod_ret->nr_links; i++) {
+ if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
+ return -EINVAL;
+
+ /*
+ * if (retval)
+ * goto do_fexit;
+ */
+
+ /* ltg %r0,retval_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
+ tjit->retval_off);
+ /* brcl 7,do_fexit */
+ EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
+ }
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ /*
+ * retval = func_addr(args);
+ */
+
+ /* lmg %r2,%rN,reg_args_off(%r15) */
+ if (nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ /* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
+ if (nr_stack_args)
+ _EMIT6(0xd200f000 |
+ (nr_stack_args * sizeof(u64) - 1) << 16 |
+ tjit->stack_args_off,
+ 0xf000 | tjit->orig_stack_args_off);
+ /* lgr %r1,%r8 */
+ EMIT4(0xb9040000, REG_1, REG_8);
+ /* %r1() */
+ call_r1(jit);
+ /* stg %r2,retval_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+
+ im->ip_after_call = jit->prg_buf + jit->prg;
+
+ /*
+ * The following nop will be patched by bpf_tramp_image_put().
+ */
+
+ /* brcl 0,im->ip_epilogue */
+ EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
+ }
+
+ /* do_fexit: */
+ tjit->do_fexit = jit->prg;
+ for (i = 0; i < fexit->nr_links; i++)
+ if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ im->ip_epilogue = jit->prg_buf + jit->prg;
+
+ /*
+ * __bpf_tramp_exit(im);
+ */
+
+ /* %r1 = __bpf_tramp_exit */
+ load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
+ /* %r2 = im */
+ load_imm64(jit, REG_2, (u64)im);
+ /* %r1() */
+ call_r1(jit);
+ }
+
+ /* lmg %r2,%rN,reg_args_off(%r15) */
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
+ REG_2 + (nr_reg_args - 1), REG_15,
+ tjit->reg_args_off);
+ /* lgr %r1,%r8 */
+ if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
+ EMIT4(0xb9040000, REG_1, REG_8);
+ /* lmg %r7,%r8,r7_r8_off(%r15) */
+ EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
+ tjit->r7_r8_off);
+ /* lg %r14,r14_off(%r15) */
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
+ /* lg %r2,retval_off(%r15) */
+ if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
+ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
+ tjit->retval_off);
+ /* aghi %r15,stack_size */
+ EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
+ /* Emit an expoline for the following indirect jump. */
+ if (nospec_uses_trampoline())
+ emit_expoline(jit);
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* br %r14 */
+ _EMIT2(0x07fe);
+ else
+ /* br %r1 */
+ _EMIT2(0x07f1);
+
+ emit_r1_thunk(jit);
+
+ return 0;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ struct bpf_tramp_jit tjit;
+ int ret;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ /* Compute offsets, check whether the code fits. */
+ memset(&tjit, 0, sizeof(tjit));
+ } else {
+ /* Generate the code. */
+ tjit.common.prg = 0;
+ tjit.common.prg_buf = image;
+ }
+ ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+ tlinks, func_addr);
+ if (ret < 0)
+ return ret;
+ if (tjit.common.prg > (char *)image_end - (char *)image)
+ /*
+ * Use the same error code as for exceeding
+ * BPF_MAX_TRAMP_LINKS.
+ */
+ return -E2BIG;
+ }
+
+ return ret;
+}
+
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+ return true;
+}