aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/uprobes.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/uprobes.c')
-rw-r--r--kernel/events/uprobes.c203
1 files changed, 102 insertions, 101 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ece7e13f6e4a..d9e357b7e17c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -19,7 +19,7 @@
#include <linux/export.h>
#include <linux/rmap.h> /* anon_vma_prepare */
#include <linux/mmu_notifier.h> /* set_pte_at_notify */
-#include <linux/swap.h> /* try_to_free_swap */
+#include <linux/swap.h> /* folio_free_swap */
#include <linux/ptrace.h> /* user_enable_single_step */
#include <linux/kdebug.h> /* notifier mechanism */
#include "../../mm/internal.h" /* munlock_vma_page */
@@ -154,48 +154,41 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
struct page *old_page, struct page *new_page)
{
+ struct folio *old_folio = page_folio(old_page);
+ struct folio *new_folio;
struct mm_struct *mm = vma->vm_mm;
- struct page_vma_mapped_walk pvmw = {
- .page = compound_head(old_page),
- .vma = vma,
- .address = addr,
- };
+ DEFINE_FOLIO_VMA_WALK(pvmw, old_folio, vma, addr, 0);
int err;
struct mmu_notifier_range range;
- struct mem_cgroup *memcg;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);
if (new_page) {
- err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
- &memcg, false);
+ new_folio = page_folio(new_page);
+ err = mem_cgroup_charge(new_folio, vma->vm_mm, GFP_KERNEL);
if (err)
return err;
}
- /* For try_to_free_swap() and munlock_vma_page() below */
- lock_page(old_page);
+ /* For folio_free_swap() below */
+ folio_lock(old_folio);
mmu_notifier_invalidate_range_start(&range);
err = -EAGAIN;
- if (!page_vma_mapped_walk(&pvmw)) {
- if (new_page)
- mem_cgroup_cancel_charge(new_page, memcg, false);
+ if (!page_vma_mapped_walk(&pvmw))
goto unlock;
- }
VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
if (new_page) {
- get_page(new_page);
- page_add_new_anon_rmap(new_page, vma, addr, false);
- mem_cgroup_commit_charge(new_page, memcg, false, false);
- lru_cache_add_active_or_unevictable(new_page, vma);
+ folio_get(new_folio);
+ page_add_new_anon_rmap(new_page, vma, addr);
+ folio_add_lru_vma(new_folio, vma);
} else
/* no new page, just dec_mm_counter for old_page */
dec_mm_counter(mm, MM_ANONPAGES);
- if (!PageAnon(old_page)) {
+ if (!folio_test_anon(old_folio)) {
dec_mm_counter(mm, mm_counter_file(old_page));
inc_mm_counter(mm, MM_ANONPAGES);
}
@@ -206,19 +199,16 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
set_pte_at_notify(mm, addr, pvmw.pte,
mk_pte(new_page, vma->vm_page_prot));
- page_remove_rmap(old_page, false);
- if (!page_mapped(old_page))
- try_to_free_swap(old_page);
+ page_remove_rmap(old_page, vma, false);
+ if (!folio_mapped(old_folio))
+ folio_free_swap(old_folio);
page_vma_mapped_walk_done(&pvmw);
-
- if (vma->vm_flags & VM_LOCKED)
- munlock_vma_page(old_page);
- put_page(old_page);
+ folio_put(old_folio);
err = 0;
unlock:
mmu_notifier_invalidate_range_end(&range);
- unlock_page(old_page);
+ folio_unlock(old_folio);
return err;
}
@@ -361,9 +351,10 @@ static bool valid_ref_ctr_vma(struct uprobe *uprobe,
static struct vm_area_struct *
find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *tmp;
- for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
+ for_each_vma(vmi, tmp)
if (valid_ref_ctr_vma(uprobe, tmp))
return tmp;
@@ -382,7 +373,7 @@ __update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
if (!vaddr || !d)
return -EINVAL;
- ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+ ret = get_user_pages_remote(mm, vaddr, 1,
FOLL_WRITE, &page, &vma, NULL);
if (unlikely(ret <= 0)) {
/*
@@ -459,11 +450,12 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
* that have fixed length instructions.
*
* uprobe_write_opcode - write the opcode at a given virtual address.
+ * @auprobe: arch specific probepoint information.
* @mm: the probed process address space.
* @vaddr: the virtual address to store the opcode.
* @opcode: opcode to be written at @vaddr.
*
- * Called with mm->mmap_sem held for write.
+ * Called with mm->mmap_lock held for write.
* Return 0 (success) or a negative errno.
*/
int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
@@ -483,7 +475,7 @@ retry:
if (is_register)
gup_flags |= FOLL_SPLIT_PMD;
/* Read the page with vaddr into memory */
- ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags,
+ ret = get_user_pages_remote(mm, vaddr, 1, gup_flags,
&old_page, &vma, NULL);
if (ret <= 0)
return ret;
@@ -563,7 +555,7 @@ put_old:
/* try collapse pmd for compound page */
if (!ret && orig_page_huge)
- collapse_pte_mapped_thp(mm, vaddr);
+ collapse_pte_mapped_thp(mm, vaddr, false);
return ret;
}
@@ -619,41 +611,56 @@ static void put_uprobe(struct uprobe *uprobe)
}
}
-static int match_uprobe(struct uprobe *l, struct uprobe *r)
+static __always_inline
+int uprobe_cmp(const struct inode *l_inode, const loff_t l_offset,
+ const struct uprobe *r)
{
- if (l->inode < r->inode)
+ if (l_inode < r->inode)
return -1;
- if (l->inode > r->inode)
+ if (l_inode > r->inode)
return 1;
- if (l->offset < r->offset)
+ if (l_offset < r->offset)
return -1;
- if (l->offset > r->offset)
+ if (l_offset > r->offset)
return 1;
return 0;
}
+#define __node_2_uprobe(node) \
+ rb_entry((node), struct uprobe, rb_node)
+
+struct __uprobe_key {
+ struct inode *inode;
+ loff_t offset;
+};
+
+static inline int __uprobe_cmp_key(const void *key, const struct rb_node *b)
+{
+ const struct __uprobe_key *a = key;
+ return uprobe_cmp(a->inode, a->offset, __node_2_uprobe(b));
+}
+
+static inline int __uprobe_cmp(struct rb_node *a, const struct rb_node *b)
+{
+ struct uprobe *u = __node_2_uprobe(a);
+ return uprobe_cmp(u->inode, u->offset, __node_2_uprobe(b));
+}
+
static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
{
- struct uprobe u = { .inode = inode, .offset = offset };
- struct rb_node *n = uprobes_tree.rb_node;
- struct uprobe *uprobe;
- int match;
+ struct __uprobe_key key = {
+ .inode = inode,
+ .offset = offset,
+ };
+ struct rb_node *node = rb_find(&key, &uprobes_tree, __uprobe_cmp_key);
- while (n) {
- uprobe = rb_entry(n, struct uprobe, rb_node);
- match = match_uprobe(&u, uprobe);
- if (!match)
- return get_uprobe(uprobe);
+ if (node)
+ return get_uprobe(__node_2_uprobe(node));
- if (match < 0)
- n = n->rb_left;
- else
- n = n->rb_right;
- }
return NULL;
}
@@ -674,32 +681,15 @@ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
{
- struct rb_node **p = &uprobes_tree.rb_node;
- struct rb_node *parent = NULL;
- struct uprobe *u;
- int match;
-
- while (*p) {
- parent = *p;
- u = rb_entry(parent, struct uprobe, rb_node);
- match = match_uprobe(uprobe, u);
- if (!match)
- return get_uprobe(u);
+ struct rb_node *node;
- if (match < 0)
- p = &parent->rb_left;
- else
- p = &parent->rb_right;
+ node = rb_find_add(&uprobe->rb_node, &uprobes_tree, __uprobe_cmp);
+ if (node)
+ return get_uprobe(__node_2_uprobe(node));
- }
-
- u = NULL;
- rb_link_node(&uprobe->rb_node, parent, p);
- rb_insert_color(&uprobe->rb_node, &uprobes_tree);
/* get access + creation ref */
refcount_set(&uprobe->ref, 2);
-
- return u;
+ return NULL;
}
/*
@@ -800,10 +790,10 @@ static int __copy_insn(struct address_space *mapping, struct file *filp,
struct page *page;
/*
* Ensure that the page that has the original instruction is populated
- * and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
+ * and in page-cache. If ->read_folio == NULL it must be shmem_mapping(),
* see uprobe_register().
*/
- if (mapping->a_ops->readpage)
+ if (mapping->a_ops->read_folio)
page = read_mapping_page(mapping, offset >> PAGE_SHIFT, filp);
else
page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
@@ -867,10 +857,6 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
if (ret)
goto out;
- /* uprobe_write_opcode() assumes we don't cross page boundary */
- BUG_ON((uprobe->offset & ~PAGE_MASK) +
- UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
-
smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */
set_bit(UPROBE_COPY_INSN, &uprobe->flags);
@@ -1064,7 +1050,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
if (err && is_register)
goto free;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
vma = find_vma(mm, info->vaddr);
if (!vma || !valid_vma(vma, is_register) ||
file_inode(vma->vm_file) != uprobe->inode)
@@ -1086,7 +1072,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
}
unlock:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
free:
mmput(mm);
info = free_map_info(info);
@@ -1160,12 +1146,22 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
return -EINVAL;
/* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
- if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
+ if (!inode->i_mapping->a_ops->read_folio &&
+ !shmem_mapping(inode->i_mapping))
return -EIO;
/* Racy, just to catch the obvious mistakes */
if (offset > i_size_read(inode))
return -EINVAL;
+ /*
+ * This ensures that copy_from_page(), copy_to_page() and
+ * __update_ref_ctr() can't cross page boundary.
+ */
+ if (!IS_ALIGNED(offset, UPROBE_SWBP_INSN_SIZE))
+ return -EINVAL;
+ if (!IS_ALIGNED(ref_ctr_offset, sizeof(short)))
+ return -EINVAL;
+
retry:
uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
if (!uprobe)
@@ -1238,11 +1234,12 @@ int uprobe_apply(struct inode *inode, loff_t offset,
static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
int err = 0;
- down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ mmap_read_lock(mm);
+ for_each_vma(vmi, vma) {
unsigned long vaddr;
loff_t offset;
@@ -1258,7 +1255,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
vaddr = offset_to_vaddr(vma, uprobe->offset);
err |= remove_breakpoint(uprobe, mm, vaddr);
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return err;
}
@@ -1355,7 +1352,7 @@ static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
}
/*
- * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
+ * Called from mmap_region/vma_adjust with mm->mmap_lock acquired.
*
* Currently we ignore all errors and always return 0, the callers
* can't handle the failure anyway.
@@ -1445,7 +1442,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
struct vm_area_struct *vma;
int ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
if (mm->uprobes_state.xol_area) {
@@ -1475,7 +1472,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
/* pairs with get_xol_area() */
smp_store_release(&mm->uprobes_state.xol_area, area); /* ^^^ */
fail:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
@@ -1674,7 +1671,7 @@ void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
copy_to_page(page, vaddr, src, len);
/*
- * We probably need flush_icache_user_range() but it needs vma.
+ * We probably need flush_icache_user_page() but it needs vma.
* This should work on most of architectures by default. If
* architecture needs to do something different it can define
* its own version of the function.
@@ -1736,7 +1733,7 @@ void uprobe_free_utask(struct task_struct *t)
}
/*
- * Allocate a uprobe_task object for the task if if necessary.
+ * Allocate a uprobe_task object for the task if necessary.
* Called when the thread hits a breakpoint.
*
* Returns:
@@ -1824,7 +1821,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
t->utask->dup_xol_addr = area->vaddr;
init_task_work(&t->utask->dup_xol_work, dup_xol_work);
- task_work_add(t, &t->utask->dup_xol_work, true);
+ task_work_add(t, &t->utask->dup_xol_work, TWA_RESUME);
}
/*
@@ -1974,7 +1971,7 @@ bool uprobe_deny_signal(void)
WARN_ON_ONCE(utask->state != UTASK_SSTEP);
- if (signal_pending(t)) {
+ if (task_sigpending(t)) {
spin_lock_irq(&t->sighand->siglock);
clear_tsk_thread_flag(t, TIF_SIGPENDING);
spin_unlock_irq(&t->sighand->siglock);
@@ -1990,9 +1987,10 @@ bool uprobe_deny_signal(void)
static void mmf_recalc_uprobes(struct mm_struct *mm)
{
+ VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ for_each_vma(vmi, vma) {
if (!valid_vma(vma, false))
continue;
/*
@@ -2014,6 +2012,9 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
uprobe_opcode_t opcode;
int result;
+ if (WARN_ON_ONCE(!IS_ALIGNED(vaddr, UPROBE_SWBP_INSN_SIZE)))
+ return -EINVAL;
+
pagefault_disable();
result = __get_user(opcode, (uprobe_opcode_t __user *)vaddr);
pagefault_enable();
@@ -2027,7 +2028,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
* but we treat this as a 'remote' access since it is
* essentially a kernel access to the memory.
*/
- result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
+ result = get_user_pages_remote(mm, vaddr, 1, FOLL_FORCE, &page,
NULL, NULL);
if (result < 0)
return result;
@@ -2045,9 +2046,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
struct uprobe *uprobe = NULL;
struct vm_area_struct *vma;
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, bp_vaddr);
- if (vma && vma->vm_start <= bp_vaddr) {
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, bp_vaddr);
+ if (vma) {
if (valid_vma(vma, false)) {
struct inode *inode = file_inode(vma->vm_file);
loff_t offset = vaddr_to_offset(vma, bp_vaddr);
@@ -2063,7 +2064,7 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
mmf_recalc_uprobes(mm);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return uprobe;
}
@@ -2187,7 +2188,7 @@ static void handle_swbp(struct pt_regs *regs)
{
struct uprobe *uprobe;
unsigned long bp_vaddr;
- int uninitialized_var(is_swbp);
+ int is_swbp;
bp_vaddr = uprobe_get_swbp_addr(regs);
if (bp_vaddr == get_trampoline_vaddr())
@@ -2197,7 +2198,7 @@ static void handle_swbp(struct pt_regs *regs)
if (!uprobe) {
if (is_swbp > 0) {
/* No matching uprobe; signal SIGTRAP. */
- send_sig(SIGTRAP, current, 0);
+ force_sig(SIGTRAP);
} else {
/*
* Either we raced with uprobe_unregister() or we can't