aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/alternative.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/alternative.c')
-rw-r--r--arch/x86/kernel/alternative.c199
1 files changed, 151 insertions, 48 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9ec463fe96f2..15ac0d5f4b40 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -23,6 +23,7 @@
#include <asm/nmi.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm/insn.h>
#include <asm/io.h>
#include <asm/fixmap.h>
@@ -936,44 +937,81 @@ static void do_sync_core(void *info)
sync_core();
}
-static struct bp_patching_desc {
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
+
+struct text_poke_loc {
+ s32 rel_addr; /* addr := _stext + rel_addr */
+ s32 rel32;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
+};
+
+struct bp_patching_desc {
struct text_poke_loc *vec;
int nr_entries;
-} bp_patching;
+ atomic_t refs;
+};
+
+static struct bp_patching_desc *bp_desc;
+
+static inline struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp)
+{
+ struct bp_patching_desc *desc = READ_ONCE(*descp); /* rcu_dereference */
+
+ if (!desc || !atomic_inc_not_zero(&desc->refs))
+ return NULL;
+
+ return desc;
+}
+
+static inline void put_desc(struct bp_patching_desc *desc)
+{
+ smp_mb__before_atomic();
+ atomic_dec(&desc->refs);
+}
-static int patch_cmp(const void *key, const void *elt)
+static inline void *text_poke_addr(struct text_poke_loc *tp)
+{
+ return _stext + tp->rel_addr;
+}
+
+static int notrace patch_cmp(const void *key, const void *elt)
{
struct text_poke_loc *tp = (struct text_poke_loc *) elt;
- if (key < tp->addr)
+ if (key < text_poke_addr(tp))
return -1;
- if (key > tp->addr)
+ if (key > text_poke_addr(tp))
return 1;
return 0;
}
NOKPROBE_SYMBOL(patch_cmp);
-int poke_int3_handler(struct pt_regs *regs)
+int notrace poke_int3_handler(struct pt_regs *regs)
{
+ struct bp_patching_desc *desc;
struct text_poke_loc *tp;
+ int len, ret = 0;
void *ip;
+ if (user_mode(regs))
+ return 0;
+
/*
* Having observed our INT3 instruction, we now must observe
- * bp_patching.nr_entries.
+ * bp_desc:
*
- * nr_entries != 0 INT3
+ * bp_desc = desc INT3
* WMB RMB
- * write INT3 if (nr_entries)
- *
- * Idem for other elements in bp_patching.
+ * write INT3 if (desc)
*/
smp_rmb();
- if (likely(!bp_patching.nr_entries))
- return 0;
-
- if (user_mode(regs))
+ desc = try_get_desc(&bp_desc);
+ if (!desc)
return 0;
/*
@@ -984,19 +1022,20 @@ int poke_int3_handler(struct pt_regs *regs)
/*
* Skip the binary search if there is a single member in the vector.
*/
- if (unlikely(bp_patching.nr_entries > 1)) {
- tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
+ if (unlikely(desc->nr_entries > 1)) {
+ tp = bsearch(ip, desc->vec, desc->nr_entries,
sizeof(struct text_poke_loc),
patch_cmp);
if (!tp)
- return 0;
+ goto out_put;
} else {
- tp = bp_patching.vec;
- if (tp->addr != ip)
- return 0;
+ tp = desc->vec;
+ if (text_poke_addr(tp) != ip)
+ goto out_put;
}
- ip += tp->len;
+ len = text_opcode_size(tp->opcode);
+ ip += len;
switch (tp->opcode) {
case INT3_INSN_OPCODE:
@@ -1004,7 +1043,7 @@ int poke_int3_handler(struct pt_regs *regs)
* Someone poked an explicit INT3, they'll want to handle it,
* do not consume.
*/
- return 0;
+ goto out_put;
case CALL_INSN_OPCODE:
int3_emulate_call(regs, (long)ip + tp->rel32);
@@ -1019,10 +1058,18 @@ int poke_int3_handler(struct pt_regs *regs)
BUG();
}
- return 1;
+ ret = 1;
+
+out_put:
+ put_desc(desc);
+ return ret;
}
NOKPROBE_SYMBOL(poke_int3_handler);
+#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
+static struct text_poke_loc tp_vec[TP_VEC_MAX];
+static int tp_vec_nr;
+
/**
* text_poke_bp_batch() -- update instructions on live kernel on SMP
* @tp: vector of instructions to patch
@@ -1044,16 +1091,20 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* replacing opcode
* - sync cores
*/
-void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
+static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
+ struct bp_patching_desc desc = {
+ .vec = tp,
+ .nr_entries = nr_entries,
+ .refs = ATOMIC_INIT(1),
+ };
unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
int do_sync;
lockdep_assert_held(&text_mutex);
- bp_patching.vec = tp;
- bp_patching.nr_entries = nr_entries;
+ smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */
/*
* Corresponding read barrier in int3 notifier for making sure the
@@ -1065,18 +1116,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* First step: add a int3 trap to the address that will be patched.
*/
for (i = 0; i < nr_entries; i++)
- text_poke(tp[i].addr, &int3, sizeof(int3));
+ text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
- on_each_cpu(do_sync_core, NULL, 1);
+ text_poke_sync();
/*
* Second step: update all but the first byte of the patched range.
*/
for (do_sync = 0, i = 0; i < nr_entries; i++) {
- if (tp[i].len - sizeof(int3) > 0) {
- text_poke((char *)tp[i].addr + sizeof(int3),
- (const char *)tp[i].text + sizeof(int3),
- tp[i].len - sizeof(int3));
+ int len = text_opcode_size(tp[i].opcode);
+
+ if (len - INT3_INSN_SIZE > 0) {
+ text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
+ (const char *)tp[i].text + INT3_INSN_SIZE,
+ len - INT3_INSN_SIZE);
do_sync++;
}
}
@@ -1087,7 +1140,7 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* not necessary and we'd be safe even without it. But
* better safe than sorry (plus there's not only Intel).
*/
- on_each_cpu(do_sync_core, NULL, 1);
+ text_poke_sync();
}
/*
@@ -1098,19 +1151,20 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
if (tp[i].text[0] == INT3_INSN_OPCODE)
continue;
- text_poke(tp[i].addr, tp[i].text, sizeof(int3));
+ text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);
do_sync++;
}
if (do_sync)
- on_each_cpu(do_sync_core, NULL, 1);
+ text_poke_sync();
/*
- * sync_core() implies an smp_mb() and orders this store against
- * the writing of the new instruction.
+ * Remove and synchronize_rcu(), except we have a very primitive
+ * refcount based completion.
*/
- bp_patching.vec = NULL;
- bp_patching.nr_entries = 0;
+ WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */
+ if (!atomic_dec_and_test(&desc.refs))
+ atomic_cond_read_acquire(&desc.refs, !VAL);
}
void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
@@ -1118,11 +1172,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
{
struct insn insn;
- if (!opcode)
- opcode = (void *)tp->text;
- else
- memcpy((void *)tp->text, opcode, len);
-
+ memcpy((void *)tp->text, opcode, len);
if (!emulate)
emulate = opcode;
@@ -1132,8 +1182,7 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
BUG_ON(!insn_complete(&insn));
BUG_ON(len != insn.length);
- tp->addr = addr;
- tp->len = len;
+ tp->rel_addr = addr - (void *)_stext;
tp->opcode = insn.opcode.bytes[0];
switch (tp->opcode) {
@@ -1167,6 +1216,55 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
}
}
+/*
+ * We hard rely on the tp_vec being ordered; ensure this is so by flushing
+ * early if needed.
+ */
+static bool tp_order_fail(void *addr)
+{
+ struct text_poke_loc *tp;
+
+ if (!tp_vec_nr)
+ return false;
+
+ if (!addr) /* force */
+ return true;
+
+ tp = &tp_vec[tp_vec_nr - 1];
+ if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
+ return true;
+
+ return false;
+}
+
+static void text_poke_flush(void *addr)
+{
+ if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
+ text_poke_bp_batch(tp_vec, tp_vec_nr);
+ tp_vec_nr = 0;
+ }
+}
+
+void text_poke_finish(void)
+{
+ text_poke_flush(NULL);
+}
+
+void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
+{
+ struct text_poke_loc *tp;
+
+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, opcode, len);
+ return;
+ }
+
+ text_poke_flush(addr);
+
+ tp = &tp_vec[tp_vec_nr++];
+ text_poke_loc_init(tp, addr, opcode, len, emulate);
+}
+
/**
* text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch
@@ -1178,10 +1276,15 @@ void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
+void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
struct text_poke_loc tp;
+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, opcode, len);
+ return;
+ }
+
text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}