diff options
Diffstat (limited to 'arch/x86/kernel/alternative.c')
-rw-r--r-- | arch/x86/kernel/alternative.c | 51 |
1 files changed, 44 insertions, 7 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 8fd39ff74a49..c3daf0aaa0ee 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -3,9 +3,11 @@ #include <linux/module.h> #include <linux/sched.h> +#include <linux/perf_event.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/stringify.h> +#include <linux/highmem.h> #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/memory.h> @@ -15,6 +17,7 @@ #include <linux/kprobes.h> #include <linux/mmu_context.h> #include <linux/bsearch.h> +#include <linux/sync_core.h> #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> @@ -53,7 +56,7 @@ __setup("noreplace-smp", setup_noreplace_smp); #define DPRINTK(fmt, args...) \ do { \ if (debug_alternative) \ - printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ + printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \ } while (0) #define DUMP_BYTES(buf, len, fmt, args...) \ @@ -64,7 +67,7 @@ do { \ if (!(len)) \ break; \ \ - printk(KERN_DEBUG fmt, ##args); \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ for (j = 0; j < (len) - 1; j++) \ printk(KERN_CONT "%02hhx ", buf[j]); \ printk(KERN_CONT "%02hhx\n", buf[j]); \ @@ -872,8 +875,6 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ BUG_ON(!pages[0] || (cross_page_boundary && !pages[1])); - local_irq_save(flags); - /* * Map the page without the global bit, as TLB flushing is done with * flush_tlb_mm_range(), which is intended for non-global PTEs. @@ -890,6 +891,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ VM_BUG_ON(!ptep); + local_irq_save(flags); + pte = mk_pte(pages[0], pgprot); set_pte_at(poking_mm, poking_addr, ptep, pte); @@ -939,8 +942,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) */ BUG_ON(memcmp(addr, opcode, len)); - pte_unmap_unlock(ptep, ptl); local_irq_restore(flags); + pte_unmap_unlock(ptep, ptl); return addr; } @@ -1001,6 +1004,7 @@ struct text_poke_loc { s32 rel32; u8 opcode; const u8 text[POKE_MAX_OPCODE_SIZE]; + u8 old; }; struct bp_patching_desc { @@ -1044,7 +1048,7 @@ static __always_inline int patch_cmp(const void *key, const void *elt) return 0; } -int noinstr poke_int3_handler(struct pt_regs *regs) +noinstr int poke_int3_handler(struct pt_regs *regs) { struct bp_patching_desc *desc; struct text_poke_loc *tp; @@ -1168,8 +1172,10 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries /* * First step: add a int3 trap to the address that will be patched. */ - for (i = 0; i < nr_entries; i++) + for (i = 0; i < nr_entries; i++) { + tp[i].old = *(u8 *)text_poke_addr(&tp[i]); text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); + } text_poke_sync(); @@ -1177,14 +1183,45 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries * Second step: update all but the first byte of the patched range. */ for (do_sync = 0, i = 0; i < nr_entries; i++) { + u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, }; int len = text_opcode_size(tp[i].opcode); if (len - INT3_INSN_SIZE > 0) { + memcpy(old + INT3_INSN_SIZE, + text_poke_addr(&tp[i]) + INT3_INSN_SIZE, + len - INT3_INSN_SIZE); text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, (const char *)tp[i].text + INT3_INSN_SIZE, len - INT3_INSN_SIZE); do_sync++; } + + /* + * Emit a perf event to record the text poke, primarily to + * support Intel PT decoding which must walk the executable code + * to reconstruct the trace. The flow up to here is: + * - write INT3 byte + * - IPI-SYNC + * - write instruction tail + * At this point the actual control flow will be through the + * INT3 and handler and not hit the old or new instruction. + * Intel PT outputs FUP/TIP packets for the INT3, so the flow + * can still be decoded. Subsequently: + * - emit RECORD_TEXT_POKE with the new instruction + * - IPI-SYNC + * - write first byte + * - IPI-SYNC + * So before the text poke event timestamp, the decoder will see + * either the old instruction flow or FUP/TIP of INT3. After the + * text poke event timestamp, the decoder will see either the + * new instruction flow or FUP/TIP of INT3. Thus decoders can + * use the timestamp as the point at which to modify the + * executable code. + * The old instruction is recorded so that the event can be + * processed forwards or backwards. + */ + perf_event_text_poke(text_poke_addr(&tp[i]), old, len, + tp[i].text, len); } if (do_sync) { |