From a13cff318cafbd493b8d5d679e5f3f761084c4fe Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:07:14 +0200 Subject: s390/mm: recfactor global pgste updates Replace the s390 specific page table walker for the pgste updates with a call to the common code walk_page_range function. There are now two pte modification functions, one for the reset of the CMMA state and another one for the initialization of the storage keys. Signed-off-by: Dominik Dingel Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390/include/asm/pgtable.h') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57c882761dea..4399be1aaeff 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1747,6 +1747,7 @@ extern int vmem_add_mapping(unsigned long start, unsigned long size); extern int vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); extern void s390_enable_skey(void); +extern void s390_reset_cmma(struct mm_struct *mm); /* * No page table caches to initialise -- cgit v1.2.3-59-g8ed1b From 2faee8ff9dc6f4bfe46f6d2d110add858140fb20 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:08:38 +0200 Subject: s390/mm: prevent and break zero page mappings in case of storage keys As soon as storage keys are enabled we need to stop working on zero page mappings to prevent inconsistencies between storage keys and pgste. Otherwise following data corruption could happen: 1) guest enables storage key 2) guest sets storage key for not mapped page X -> change goes to PGSTE 3) guest reads from page X -> as X was not dirty before, the page will be zero page backed, storage key from PGSTE for X will go to storage key for zero page 4) guest sets storage key for not mapped page Y (same logic as above 5) guest reads from page Y -> as Y was not dirty before, the page will be zero page backed, storage key from PGSTE for Y will got to storage key for zero page overwriting storage key for X While holding the mmap sem, we are safe against changes on entries we already fixed, as every fault would need to take the mmap_sem (read). Other vCPUs executing storage key instructions will get a one time interception and be serialized also with mmap_sem. Signed-off-by: Dominik Dingel Reviewed-by: Paolo Bonzini Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 5 +++++ arch/s390/mm/pgtable.c | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/s390/include/asm/pgtable.h') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4399be1aaeff..df2e7f14ffb7 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -479,6 +479,11 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +/* + * In the case that a guest uses storage keys + * faults should no longer be backed by zero pages + */ +#define mm_forbids_zeropage mm_use_skey static inline int mm_use_skey(struct mm_struct *mm) { #ifdef CONFIG_PGSTE diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 019afdf50b1a..0f1e9ff6bc12 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1256,6 +1256,15 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr, pgste_t pgste; pgste = pgste_get_lock(pte); + /* + * Remove all zero page mappings, + * after establishing a policy to forbid zero page mappings + * following faults for that page will get fresh anonymous pages + */ + if (is_zero_pfn(pte_pfn(*pte))) { + ptep_flush_direct(walk->mm, addr, pte); + pte_val(*pte) = _PAGE_INVALID; + } /* Clear storage key */ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT); @@ -1274,9 +1283,11 @@ void s390_enable_skey(void) down_write(&mm->mmap_sem); if (mm_use_skey(mm)) goto out_up; + + mm->context.use_skey = 1; + walk.mm = mm; walk_page_range(0, TASK_SIZE, &walk); - mm->context.use_skey = 1; out_up: up_write(&mm->mmap_sem); -- cgit v1.2.3-59-g8ed1b From 3ac8e38015d4fd1c12e4e048a01a9f059a2053a2 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:09:17 +0200 Subject: s390/mm: disable KSM for storage key enabled pages When storage keys are enabled unmerge already merged pages and prevent new pages from being merged. Signed-off-by: Dominik Dingel Acked-by: Christian Borntraeger Reviewed-by: Paolo Bonzini Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/kvm/priv.c | 17 ++++++++++++----- arch/s390/mm/pgtable.c | 16 +++++++++++++++- 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'arch/s390/include/asm/pgtable.h') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index df2e7f14ffb7..00d460742e1e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1751,7 +1751,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern int vmem_add_mapping(unsigned long start, unsigned long size); extern int vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); -extern void s390_enable_skey(void); +extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); /* diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 72bb2dd8b9cd..f47cb0c6d906 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -156,21 +156,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) return 0; } -static void __skey_check_enable(struct kvm_vcpu *vcpu) +static int __skey_check_enable(struct kvm_vcpu *vcpu) { + int rc = 0; if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) - return; + return rc; - s390_enable_skey(); + rc = s390_enable_skey(); trace_kvm_s390_skey_related_inst(vcpu); vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + return rc; } static int handle_skey(struct kvm_vcpu *vcpu) { - __skey_check_enable(vcpu); + int rc = __skey_check_enable(vcpu); + if (rc) + return rc; vcpu->stat.instruction_storage_key++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) @@ -683,7 +687,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { - __skey_check_enable(vcpu); + int rc = __skey_check_enable(vcpu); + + if (rc) + return rc; if (set_guest_storage_key(current->mm, useraddr, vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0f1e9ff6bc12..b1871d39e46e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include @@ -1275,22 +1277,34 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr, return 0; } -void s390_enable_skey(void) +int s390_enable_skey(void) { struct mm_walk walk = { .pte_entry = __s390_enable_skey }; struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc = 0; down_write(&mm->mmap_sem); if (mm_use_skey(mm)) goto out_up; mm->context.use_skey = 1; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + mm->context.use_skey = 0; + rc = -ENOMEM; + goto out_up; + } + } + mm->def_flags &= ~VM_MERGEABLE; walk.mm = mm; walk_page_range(0, TASK_SIZE, &walk); out_up: up_write(&mm->mmap_sem); + return rc; } EXPORT_SYMBOL_GPL(s390_enable_skey); -- cgit v1.2.3-59-g8ed1b From c933146a5e41e42ea3eb4f34fa02e201da3f068e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 15 Oct 2014 12:17:38 +0200 Subject: s390/ftrace,kprobes: allow to patch first instruction If the function tracer is enabled, allow to set kprobes on the first instruction of a function (which is the function trace caller): If no kprobe is set handling of enabling and disabling function tracing of a function simply patches the first instruction. Either it is a nop (right now it's an unconditional branch, which skips the mcount block), or it's a branch to the ftrace_caller() function. If a kprobe is being placed on a function tracer calling instruction we encode if we actually have a nop or branch in the remaining bytes after the breakpoint instruction (illegal opcode). This is possible, since the size of the instruction used for the nop and branch is six bytes, while the size of the breakpoint is only two bytes. Therefore the first two bytes contain the illegal opcode and the last four bytes contain either "0" for nop or "1" for branch. The kprobes code will then execute/simulate the correct instruction. Instruction patching for kprobes and function tracer is always done with stop_machine(). Therefore we don't have any races where an instruction is patched concurrently on a different cpu. Besides that also the program check handler which executes the function trace caller instruction won't be executed concurrently to any stop_machine() execution. This allows to keep full fault based kprobes handling which generates correct pt_regs contents automatically. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 52 ++++++++++++++-- arch/s390/include/asm/kprobes.h | 1 + arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/pgtable.h | 12 ++++ arch/s390/kernel/asm-offsets.c | 1 - arch/s390/kernel/early.c | 4 -- arch/s390/kernel/ftrace.c | 132 +++++++++++++++++++++++++--------------- arch/s390/kernel/kprobes.c | 92 ++++++++++++++++++++-------- arch/s390/kernel/mcount.S | 1 + arch/s390/kernel/setup.c | 2 - arch/s390/kernel/smp.c | 1 - scripts/recordmcount.c | 2 +- scripts/recordmcount.pl | 2 +- 13 files changed, 214 insertions(+), 92 deletions(-) (limited to 'arch/s390/include/asm/pgtable.h') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 3aef8afec336..785041f1dc77 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -1,25 +1,67 @@ #ifndef _ASM_S390_FTRACE_H #define _ASM_S390_FTRACE_H +#define ARCH_SUPPORTS_FTRACE_OPS 1 + +#define MCOUNT_INSN_SIZE 24 +#define MCOUNT_RETURN_FIXUP 18 + #ifndef __ASSEMBLY__ -extern void _mcount(void); +void _mcount(void); +void ftrace_caller(void); + extern char ftrace_graph_caller_end; +extern unsigned long ftrace_plt; struct dyn_arch_ftrace { }; -#define MCOUNT_ADDR ((long)_mcount) +#define MCOUNT_ADDR ((unsigned long)_mcount) +#define FTRACE_ADDR ((unsigned long)ftrace_caller) +#define KPROBE_ON_FTRACE_NOP 0 +#define KPROBE_ON_FTRACE_CALL 1 static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } -#endif /* __ASSEMBLY__ */ +struct ftrace_insn { + u16 opc; + s32 disp; +} __packed; -#define MCOUNT_INSN_SIZE 18 +static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_FUNCTION_TRACER + /* jg .+24 */ + insn->opc = 0xc0f4; + insn->disp = MCOUNT_INSN_SIZE / 2; +#endif +} -#define ARCH_SUPPORTS_FTRACE_OPS 1 +static inline int is_ftrace_nop(struct ftrace_insn *insn) +{ +#ifdef CONFIG_FUNCTION_TRACER + if (insn->disp == MCOUNT_INSN_SIZE / 2) + return 1; +#endif + return 0; +} + +static inline void ftrace_generate_call_insn(struct ftrace_insn *insn, + unsigned long ip) +{ +#ifdef CONFIG_FUNCTION_TRACER + unsigned long target; + /* brasl r0,ftrace_caller */ + target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR; + insn->opc = 0xc005; + insn->disp = (target - ip) / 2; +#endif +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 98629173ce3b..b47ad3b642cc 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -60,6 +60,7 @@ typedef u16 kprobe_opcode_t; struct arch_specific_insn { /* copy of original instruction */ kprobe_opcode_t *insn; + unsigned int is_ftrace_insn : 1; }; struct prev_kprobe { diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 6cc51fe84410..34fbcac61133 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -147,7 +147,7 @@ struct _lowcore { __u32 softirq_pending; /* 0x02ec */ __u32 percpu_offset; /* 0x02f0 */ __u32 machine_flags; /* 0x02f4 */ - __u32 ftrace_func; /* 0x02f8 */ + __u8 pad_0x02f8[0x02fc-0x02f8]; /* 0x02f8 */ __u32 spinlock_lockval; /* 0x02fc */ __u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */ @@ -297,7 +297,7 @@ struct _lowcore { __u64 percpu_offset; /* 0x0378 */ __u64 vdso_per_cpu_data; /* 0x0380 */ __u64 machine_flags; /* 0x0388 */ - __u64 ftrace_func; /* 0x0390 */ + __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ __u64 gmap; /* 0x0398 */ __u32 spinlock_lockval; /* 0x03a0 */ __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 00d460742e1e..5ef1a266936a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -133,6 +133,18 @@ extern unsigned long MODULES_END; #define MODULES_LEN (1UL << 31) #endif +static inline int is_module_addr(void *addr) +{ +#ifdef CONFIG_64BIT + BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); + if (addr < (void *)MODULES_VADDR) + return 0; + if (addr > (void *)MODULES_END) + return 0; +#endif + return 1; +} + /* * A 31 bit pagetable entry of S390 has following format: * | PFRA | | OS | diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ef279a136801..f3a78337ca86 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -156,7 +156,6 @@ int main(void) DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); - DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); BLANK(); DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cef2879edff3..302ac1f7f8e7 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -490,8 +489,5 @@ void __init startup_init(void) detect_machine_facilities(); setup_topology(); sclp_early_detect(); -#ifdef CONFIG_DYNAMIC_FTRACE - S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; -#endif lockdep_on(); } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 51d14fe5eb9a..5744d25c1d33 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -7,6 +7,7 @@ * Martin Schwidefsky */ +#include #include #include #include @@ -15,60 +16,39 @@ #include #include #include +#include #include "entry.h" -void mcount_replace_code(void); -void ftrace_disable_code(void); -void ftrace_enable_insn(void); - /* * The mcount code looks like this: * stg %r14,8(%r15) # offset 0 * larl %r1,<&counter> # offset 6 * brasl %r14,_mcount # offset 12 * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. The complete mcount block initially gets replaced - * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop - * only patch the jg/lg instruction within the block. - * Note: we do not patch the first instruction to an unconditional branch, - * since that would break kprobes/jprobes. It is easier to leave the larl - * instruction in and only modify the second instruction. + * Total length is 24 bytes. Only the first instruction will be patched + * by ftrace_make_call / ftrace_make_nop. * The enabled ftrace code block looks like this: - * larl %r0,.+24 # offset 0 - * > lg %r1,__LC_FTRACE_FUNC # offset 6 - * br %r1 # offset 12 - * brcl 0,0 # offset 14 - * brc 0,0 # offset 20 + * > brasl %r0,ftrace_caller # offset 0 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 * The ftrace function gets called with a non-standard C function call ABI * where r0 contains the return address. It is also expected that the called * function only clobbers r0 and r1, but restores r2-r15. + * For module code we can't directly jump to ftrace caller, but need a + * trampoline (ftrace_plt), which clobbers also r1. * The return point of the ftrace function has offset 24, so execution * continues behind the mcount block. - * larl %r0,.+24 # offset 0 - * > jg .+18 # offset 6 - * br %r1 # offset 12 - * brcl 0,0 # offset 14 - * brc 0,0 # offset 20 + * The disabled ftrace code block looks like this: + * > jg .+24 # offset 0 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 * The jg instruction branches to offset 24 to skip as many instructions * as possible. */ -asm( - " .align 4\n" - "mcount_replace_code:\n" - " larl %r0,0f\n" - "ftrace_disable_code:\n" - " jg 0f\n" - " br %r1\n" - " brcl 0,0\n" - " brc 0,0\n" - "0:\n" - " .align 4\n" - "ftrace_enable_insn:\n" - " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); - -#define MCOUNT_BLOCK_SIZE 24 -#define MCOUNT_INSN_OFFSET 6 -#define FTRACE_INSN_SIZE 6 + +unsigned long ftrace_plt; int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) @@ -79,24 +59,62 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Initial replacement of the whole mcount block */ - if (addr == MCOUNT_ADDR) { - if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET, - mcount_replace_code, - MCOUNT_BLOCK_SIZE)) - return -EPERM; - return 0; + struct ftrace_insn insn; + unsigned short op; + void *from, *to; + size_t size; + + ftrace_generate_nop_insn(&insn); + size = sizeof(insn); + from = &insn; + to = (void *) rec->ip; + if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op))) + return -EFAULT; + /* + * If we find a breakpoint instruction, a kprobe has been placed + * at the beginning of the function. We write the constant + * KPROBE_ON_FTRACE_NOP into the remaining four bytes of the original + * instruction so that the kprobes handler can execute a nop, if it + * reaches this breakpoint. + */ + if (op == BREAKPOINT_INSTRUCTION) { + size -= 2; + from += 2; + to += 2; + insn.disp = KPROBE_ON_FTRACE_NOP; } - if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, - MCOUNT_INSN_SIZE)) + if (probe_kernel_write(to, from, size)) return -EPERM; return 0; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn, - FTRACE_INSN_SIZE)) + struct ftrace_insn insn; + unsigned short op; + void *from, *to; + size_t size; + + ftrace_generate_call_insn(&insn, rec->ip); + size = sizeof(insn); + from = &insn; + to = (void *) rec->ip; + if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op))) + return -EFAULT; + /* + * If we find a breakpoint instruction, a kprobe has been placed + * at the beginning of the function. We write the constant + * KPROBE_ON_FTRACE_CALL into the remaining four bytes of the original + * instruction so that the kprobes handler can execute a brasl if it + * reaches this breakpoint. + */ + if (op == BREAKPOINT_INSTRUCTION) { + size -= 2; + from += 2; + to += 2; + insn.disp = KPROBE_ON_FTRACE_CALL; + } + if (probe_kernel_write(to, from, size)) return -EPERM; return 0; } @@ -111,6 +129,24 @@ int __init ftrace_dyn_arch_init(void) return 0; } +static int __init ftrace_plt_init(void) +{ + unsigned int *ip; + + ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE); + if (!ftrace_plt) + panic("cannot allocate ftrace plt\n"); + ip = (unsigned int *) ftrace_plt; + ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ + ip[1] = 0x100a0004; + ip[2] = 0x07f10000; + ip[3] = FTRACE_ADDR >> 32; + ip[4] = FTRACE_ADDR & 0xffffffff; + set_memory_ro(ftrace_plt, 1); + return 0; +} +device_initcall(ftrace_plt_init); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addresses diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 014d4729b134..d6716c29b7f8 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -60,10 +61,21 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = { static void __kprobes copy_instruction(struct kprobe *p) { + unsigned long ip = (unsigned long) p->addr; s64 disp, new_disp; u64 addr, new_addr; - memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); + if (ftrace_location(ip) == ip) { + /* + * If kprobes patches the instruction that is morphed by + * ftrace make sure that kprobes always sees the branch + * "jg .+24" that skips the mcount block + */ + ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn); + p->ainsn.is_ftrace_insn = 1; + } else + memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); + p->opcode = p->ainsn.insn[0]; if (!probe_is_insn_relative_long(p->ainsn.insn)) return; /* @@ -85,18 +97,6 @@ static inline int is_kernel_addr(void *addr) return addr < (void *)_end; } -static inline int is_module_addr(void *addr) -{ -#ifdef CONFIG_64BIT - BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); - if (addr < (void *)MODULES_VADDR) - return 0; - if (addr > (void *)MODULES_END) - return 0; -#endif - return 1; -} - static int __kprobes s390_get_insn_slot(struct kprobe *p) { /* @@ -132,43 +132,63 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; if (s390_get_insn_slot(p)) return -ENOMEM; - p->opcode = *p->addr; copy_instruction(p); return 0; } -struct ins_replace_args { - kprobe_opcode_t *ptr; - kprobe_opcode_t opcode; +int arch_check_ftrace_location(struct kprobe *p) +{ + return 0; +} + +struct swap_insn_args { + struct kprobe *p; + unsigned int arm_kprobe : 1; }; -static int __kprobes swap_instruction(void *aref) +static int __kprobes swap_instruction(void *data) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long status = kcb->kprobe_status; - struct ins_replace_args *args = aref; - + struct swap_insn_args *args = data; + struct ftrace_insn new_insn, *insn; + struct kprobe *p = args->p; + size_t len; + + new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode; + len = sizeof(new_insn.opc); + if (!p->ainsn.is_ftrace_insn) + goto skip_ftrace; + len = sizeof(new_insn); + insn = (struct ftrace_insn *) p->addr; + if (args->arm_kprobe) { + if (is_ftrace_nop(insn)) + new_insn.disp = KPROBE_ON_FTRACE_NOP; + else + new_insn.disp = KPROBE_ON_FTRACE_CALL; + } else { + ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr); + if (insn->disp == KPROBE_ON_FTRACE_NOP) + ftrace_generate_nop_insn(&new_insn); + } +skip_ftrace: kcb->kprobe_status = KPROBE_SWAP_INST; - probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode)); + probe_kernel_write(p->addr, &new_insn, len); kcb->kprobe_status = status; return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) { - struct ins_replace_args args; + struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - args.ptr = p->addr; - args.opcode = BREAKPOINT_INSTRUCTION; stop_machine(swap_instruction, &args, NULL); } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - struct ins_replace_args args; + struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - args.ptr = p->addr; - args.opcode = p->opcode; stop_machine(swap_instruction, &args, NULL); } @@ -459,6 +479,24 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; int fixup = probe_get_fixup_type(p->ainsn.insn); + /* Check if the kprobes location is an enabled ftrace caller */ + if (p->ainsn.is_ftrace_insn) { + struct ftrace_insn *insn = (struct ftrace_insn *) p->addr; + struct ftrace_insn call_insn; + + ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr); + /* + * A kprobe on an enabled ftrace call site actually single + * stepped an unconditional branch (ftrace nop equivalent). + * Now we need to fixup things and pretend that a brasl r0,... + * was executed instead. + */ + if (insn->disp == KPROBE_ON_FTRACE_CALL) { + ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE; + regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn); + } + } + if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 4300ea374826..b6dfc5bfcb89 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -27,6 +27,7 @@ ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller lgr %r1,%r15 + aghi %r0,MCOUNT_RETURN_FIXUP aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e80d9ff9a56d..4e532c67832f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -356,7 +355,6 @@ static void __init setup_lowcore(void) lc->steal_timer = S390_lowcore.steal_timer; lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; - lc->ftrace_func = S390_lowcore.ftrace_func; restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); restart_stack += ASYNC_SIZE; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6fd9e60101f1..0b499f5cbe19 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -236,7 +236,6 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; - lc->ftrace_func = S390_lowcore.ftrace_func; lc->user_timer = lc->system_timer = lc->steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); save_access_regs((unsigned int *) lc->access_regs_save_area); diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 001facfa5b74..3d1984e59a30 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -404,7 +404,7 @@ do_file(char const *const fname) } if (w2(ghdr->e_machine) == EM_S390) { reltype = R_390_64; - mcount_adjust_64 = -8; + mcount_adjust_64 = -14; } if (w2(ghdr->e_machine) == EM_MIPS) { reltype = R_MIPS_64; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index d4b665610d67..56ea99a12ab7 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -243,7 +243,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 64) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; - $mcount_adjust = -8; + $mcount_adjust = -14; $alignment = 8; $type = ".quad"; $ld .= " -m elf64_s390"; -- cgit v1.2.3-59-g8ed1b From fcbe08d66f57c368e77ca729dd01e6b539ffb3ff Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Oct 2014 10:52:29 +0200 Subject: s390/mm: pmdp_get_and_clear_full optimization Analog to ptep_get_and_clear_full define a variant of the pmpd_get_and_clear primitive which gets the full hint from the mmu_gather struct. This allows s390 to avoid a costly instruction when destroying an address space. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 13 +++++++++++++ include/asm-generic/pgtable.h | 11 +++++++++++ mm/huge_memory.c | 3 ++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'arch/s390/include/asm/pgtable.h') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5ef1a266936a..5e102422c9ab 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1651,6 +1651,19 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, return pmd; } +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL +static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp, int full) +{ + pmd_t pmd = *pmdp; + + if (!full) + pmdp_flush_lazy(mm, address, pmdp); + pmd_clear(pmdp); + return pmd; +} + #define __HAVE_ARCH_PMDP_CLEAR_FLUSH static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 752e30d63904..177d5973b132 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -103,6 +103,17 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp, + int full) +{ + return pmdp_get_and_clear(mm, address, pmdp); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long address, pte_t *ptep, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7e9c15cb93a9..6a37f1b2ed1e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1400,7 +1400,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * pgtable_trans_huge_withdraw after finishing pmdp related * operations. */ - orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); + orig_pmd = pmdp_get_and_clear_full(tlb->mm, addr, pmd, + tlb->fullmm); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { -- cgit v1.2.3-59-g8ed1b