aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/as_dirty_helpers.c300
-rw-r--r--mm/memory.c145
4 files changed, 36 insertions, 413 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 5006d0e6a5c7..f0c76ba47695 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -765,7 +765,4 @@ config GUP_BENCHMARK
config ARCH_HAS_PTE_SPECIAL
bool
-config AS_DIRTY_HELPERS
- bool
-
endmenu
diff --git a/mm/Makefile b/mm/Makefile
index f5d412bbc2f7..ac5e5ba78874 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -104,4 +104,3 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
obj-$(CONFIG_HMM) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
-obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o
diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c
deleted file mode 100644
index f600e31534fb..000000000000
--- a/mm/as_dirty_helpers.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
-#include <linux/mm_types.h>
-#include <linux/hugetlb.h>
-#include <linux/bitops.h>
-#include <linux/mmu_notifier.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-/**
- * struct apply_as - Closure structure for apply_as_range
- * @base: struct pfn_range_apply we derive from
- * @start: Address of first modified pte
- * @end: Address of last modified pte + 1
- * @total: Total number of modified ptes
- * @vma: Pointer to the struct vm_area_struct we're currently operating on
- */
-struct apply_as {
- struct pfn_range_apply base;
- unsigned long start;
- unsigned long end;
- unsigned long total;
- struct vm_area_struct *vma;
-};
-
-/**
- * apply_pt_wrprotect - Leaf pte callback to write-protect a pte
- * @pte: Pointer to the pte
- * @token: Page table token, see apply_to_pfn_range()
- * @addr: The virtual page address
- * @closure: Pointer to a struct pfn_range_apply embedded in a
- * struct apply_as
- *
- * The function write-protects a pte and records the range in
- * virtual address space of touched ptes for efficient range TLB flushes.
- *
- * Return: Always zero.
- */
-static int apply_pt_wrprotect(pte_t *pte, pgtable_t token,
- unsigned long addr,
- struct pfn_range_apply *closure)
-{
- struct apply_as *aas = container_of(closure, typeof(*aas), base);
- pte_t ptent = *pte;
-
- if (pte_write(ptent)) {
- pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
-
- ptent = pte_wrprotect(old_pte);
- ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
- aas->total++;
- aas->start = min(aas->start, addr);
- aas->end = max(aas->end, addr + PAGE_SIZE);
- }
-
- return 0;
-}
-
-/**
- * struct apply_as_clean - Closure structure for apply_as_clean
- * @base: struct apply_as we derive from
- * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
- * @bitmap: Bitmap with one bit for each page offset in the address_space range
- * covered.
- * @start: Address_space page offset of first modified pte relative
- * to @bitmap_pgoff
- * @end: Address_space page offset of last modified pte relative
- * to @bitmap_pgoff
- */
-struct apply_as_clean {
- struct apply_as base;
- pgoff_t bitmap_pgoff;
- unsigned long *bitmap;
- pgoff_t start;
- pgoff_t end;
-};
-
-/**
- * apply_pt_clean - Leaf pte callback to clean a pte
- * @pte: Pointer to the pte
- * @token: Page table token, see apply_to_pfn_range()
- * @addr: The virtual page address
- * @closure: Pointer to a struct pfn_range_apply embedded in a
- * struct apply_as_clean
- *
- * The function cleans a pte and records the range in
- * virtual address space of touched ptes for efficient TLB flushes.
- * It also records dirty ptes in a bitmap representing page offsets
- * in the address_space, as well as the first and last of the bits
- * touched.
- *
- * Return: Always zero.
- */
-static int apply_pt_clean(pte_t *pte, pgtable_t token,
- unsigned long addr,
- struct pfn_range_apply *closure)
-{
- struct apply_as *aas = container_of(closure, typeof(*aas), base);
- struct apply_as_clean *clean = container_of(aas, typeof(*clean), base);
- pte_t ptent = *pte;
-
- if (pte_dirty(ptent)) {
- pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) +
- aas->vma->vm_pgoff - clean->bitmap_pgoff;
- pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte);
-
- ptent = pte_mkclean(old_pte);
- ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent);
-
- aas->total++;
- aas->start = min(aas->start, addr);
- aas->end = max(aas->end, addr + PAGE_SIZE);
-
- __set_bit(pgoff, clean->bitmap);
- clean->start = min(clean->start, pgoff);
- clean->end = max(clean->end, pgoff + 1);
- }
-
- return 0;
-}
-
-/**
- * apply_as_range - Apply a pte callback to all PTEs pointing into a range
- * of an address_space.
- * @mapping: Pointer to the struct address_space
- * @aas: Closure structure
- * @first_index: First page offset in the address_space
- * @nr: Number of incremental page offsets to cover
- *
- * Return: Number of ptes touched. Note that this number might be larger
- * than @nr if there are overlapping vmas
- */
-static unsigned long apply_as_range(struct address_space *mapping,
- struct apply_as *aas,
- pgoff_t first_index, pgoff_t nr)
-{
- struct vm_area_struct *vma;
- pgoff_t vba, vea, cba, cea;
- unsigned long start_addr, end_addr;
- struct mmu_notifier_range range;
-
- i_mmap_lock_read(mapping);
- vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
- first_index + nr - 1) {
- unsigned long vm_flags = READ_ONCE(vma->vm_flags);
-
- /*
- * We can only do advisory flag tests below, since we can't
- * require the vm's mmap_sem to be held to protect the flags.
- * Therefore, callers that strictly depend on specific mmap
- * flags to remain constant throughout the operation must
- * either ensure those flags are immutable for all relevant
- * vmas or can't use this function. Fixing this properly would
- * require the vma::vm_flags to be protected by a separate
- * lock taken after the i_mmap_lock
- */
-
- /* Skip non-applicable VMAs */
- if ((vm_flags & (VM_SHARED | VM_WRITE)) !=
- (VM_SHARED | VM_WRITE))
- continue;
-
- /* Warn on and skip VMAs whose flags indicate illegal usage */
- if (WARN_ON((vm_flags & (VM_HUGETLB | VM_IO)) != VM_IO))
- continue;
-
- /* Clip to the vma */
- vba = vma->vm_pgoff;
- vea = vba + vma_pages(vma);
- cba = first_index;
- cba = max(cba, vba);
- cea = first_index + nr;
- cea = min(cea, vea);
-
- /* Translate to virtual address */
- start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
- end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
- if (start_addr >= end_addr)
- continue;
-
- aas->base.mm = vma->vm_mm;
- aas->vma = vma;
- aas->start = end_addr;
- aas->end = start_addr;
-
- mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0,
- vma, vma->vm_mm, start_addr, end_addr);
- mmu_notifier_invalidate_range_start(&range);
-
- /* Needed when we only change protection? */
- flush_cache_range(vma, start_addr, end_addr);
-
- /*
- * We're not using tlb_gather_mmu() since typically
- * only a small subrange of PTEs are affected.
- */
- inc_tlb_flush_pending(vma->vm_mm);
-
- /* Should not error since aas->base.alloc == 0 */
- WARN_ON(apply_to_pfn_range(&aas->base, start_addr,
- end_addr - start_addr));
- if (aas->end > aas->start)
- flush_tlb_range(vma, aas->start, aas->end);
-
- mmu_notifier_invalidate_range_end(&range);
- dec_tlb_flush_pending(vma->vm_mm);
- }
- i_mmap_unlock_read(mapping);
-
- return aas->total;
-}
-
-/**
- * apply_as_wrprotect - Write-protect all ptes in an address_space range
- * @mapping: The address_space we want to write protect
- * @first_index: The first page offset in the range
- * @nr: Number of incremental page offsets to cover
- *
- * WARNING: This function should only be used for address spaces whose
- * vmas are marked VM_IO and that do not contain huge pages.
- * To avoid interference with COW'd pages, vmas not marked VM_SHARED are
- * simply skipped.
- *
- * Return: The number of ptes actually write-protected. Note that
- * already write-protected ptes are not counted.
- */
-unsigned long apply_as_wrprotect(struct address_space *mapping,
- pgoff_t first_index, pgoff_t nr)
-{
- struct apply_as aas = {
- .base = {
- .alloc = 0,
- .ptefn = apply_pt_wrprotect,
- },
- .total = 0,
- };
-
- return apply_as_range(mapping, &aas, first_index, nr);
-}
-EXPORT_SYMBOL_GPL(apply_as_wrprotect);
-
-/**
- * apply_as_clean - Clean all ptes in an address_space range
- * @mapping: The address_space we want to clean
- * @first_index: The first page offset in the range
- * @nr: Number of incremental page offsets to cover
- * @bitmap_pgoff: The page offset of the first bit in @bitmap
- * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
- * cover the whole range @first_index..@first_index + @nr.
- * @start: Pointer to number of the first set bit in @bitmap.
- * is modified as new bits are set by the function.
- * @end: Pointer to the number of the last set bit in @bitmap.
- * none set. The value is modified as new bits are set by the function.
- *
- * Note: When this function returns there is no guarantee that a CPU has
- * not already dirtied new ptes. However it will not clean any ptes not
- * reported in the bitmap.
- *
- * If a caller needs to make sure all dirty ptes are picked up and none
- * additional are added, it first needs to write-protect the address-space
- * range and make sure new writers are blocked in page_mkwrite() or
- * pfn_mkwrite(). And then after a TLB flush following the write-protection
- * pick up all dirty bits.
- *
- * WARNING: This function should only be used for address spaces whose
- * vmas are marked VM_IO and that do not contain huge pages.
- * To avoid interference with COW'd pages, vmas not marked VM_SHARED are
- * simply skipped.
- *
- * Return: The number of dirty ptes actually cleaned.
- */
-unsigned long apply_as_clean(struct address_space *mapping,
- pgoff_t first_index, pgoff_t nr,
- pgoff_t bitmap_pgoff,
- unsigned long *bitmap,
- pgoff_t *start,
- pgoff_t *end)
-{
- bool none_set = (*start >= *end);
- struct apply_as_clean clean = {
- .base = {
- .base = {
- .alloc = 0,
- .ptefn = apply_pt_clean,
- },
- .total = 0,
- },
- .bitmap_pgoff = bitmap_pgoff,
- .bitmap = bitmap,
- .start = none_set ? nr : *start,
- .end = none_set ? 0 : *end,
- };
- unsigned long ret = apply_as_range(mapping, &clean.base, first_index,
- nr);
-
- *start = clean.start;
- *end = clean.end;
- return ret;
-}
-EXPORT_SYMBOL_GPL(apply_as_clean);
diff --git a/mm/memory.c b/mm/memory.c
index 462aa47f8878..ddf20bd0c317 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2032,17 +2032,18 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long
}
EXPORT_SYMBOL(vm_iomap_memory);
-static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
- unsigned long addr, unsigned long end)
+static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ pte_fn_t fn, void *data)
{
pte_t *pte;
int err;
pgtable_t token;
spinlock_t *uninitialized_var(ptl);
- pte = (closure->mm == &init_mm) ?
+ pte = (mm == &init_mm) ?
pte_alloc_kernel(pmd, addr) :
- pte_alloc_map_lock(closure->mm, pmd, addr, &ptl);
+ pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
@@ -2053,109 +2054,86 @@ static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd,
token = pmd_pgtable(*pmd);
do {
- err = closure->ptefn(pte++, token, addr, closure);
+ err = fn(pte++, token, addr, data);
if (err)
break;
} while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
- if (closure->mm != &init_mm)
+ if (mm != &init_mm)
pte_unmap_unlock(pte-1, ptl);
return err;
}
-static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud,
- unsigned long addr, unsigned long end)
+static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end,
+ pte_fn_t fn, void *data)
{
pmd_t *pmd;
unsigned long next;
- int err = 0;
+ int err;
BUG_ON(pud_huge(*pud));
- pmd = pmd_alloc(closure->mm, pud, addr);
+ pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return -ENOMEM;
-
do {
next = pmd_addr_end(addr, end);
- if (!closure->alloc && pmd_none_or_clear_bad(pmd))
- continue;
- err = apply_to_pte_range(closure, pmd, addr, next);
+ err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
if (err)
break;
} while (pmd++, addr = next, addr != end);
return err;
}
-static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d,
- unsigned long addr, unsigned long end)
+static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
+ unsigned long addr, unsigned long end,
+ pte_fn_t fn, void *data)
{
pud_t *pud;
unsigned long next;
- int err = 0;
+ int err;
- pud = pud_alloc(closure->mm, p4d, addr);
+ pud = pud_alloc(mm, p4d, addr);
if (!pud)
return -ENOMEM;
-
do {
next = pud_addr_end(addr, end);
- if (!closure->alloc && pud_none_or_clear_bad(pud))
- continue;
- err = apply_to_pmd_range(closure, pud, addr, next);
+ err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
if (err)
break;
} while (pud++, addr = next, addr != end);
return err;
}
-static int apply_to_p4d_range(struct pfn_range_apply *closure, pgd_t *pgd,
- unsigned long addr, unsigned long end)
+static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end,
+ pte_fn_t fn, void *data)
{
p4d_t *p4d;
unsigned long next;
- int err = 0;
+ int err;
- p4d = p4d_alloc(closure->mm, pgd, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return -ENOMEM;
-
do {
next = p4d_addr_end(addr, end);
- if (!closure->alloc && p4d_none_or_clear_bad(p4d))
- continue;
- err = apply_to_pud_range(closure, p4d, addr, next);
+ err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
if (err)
break;
} while (p4d++, addr = next, addr != end);
return err;
}
-/**
- * apply_to_pfn_range - Scan a region of virtual memory, calling a provided
- * function on each leaf page table entry
- * @closure: Details about how to scan and what function to apply
- * @addr: Start virtual address
- * @size: Size of the region
- *
- * If @closure->alloc is set to 1, the function will fill in the page table
- * as necessary. Otherwise it will skip non-present parts.
- * Note: The caller must ensure that the range does not contain huge pages.
- * The caller must also assure that the proper mmu_notifier functions are
- * called before and after the call to apply_to_pfn_range.
- *
- * WARNING: Do not use this function unless you know exactly what you are
- * doing. It is lacking support for huge pages and transparent huge pages.
- *
- * Return: Zero on success. If the provided function returns a non-zero status,
- * the page table walk will terminate and that status will be returned.
- * If @closure->alloc is set to 1, then this function may also return memory
- * allocation errors arising from allocating page table memory.
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
*/
-int apply_to_pfn_range(struct pfn_range_apply *closure,
- unsigned long addr, unsigned long size)
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
{
pgd_t *pgd;
unsigned long next;
@@ -2165,65 +2143,16 @@ int apply_to_pfn_range(struct pfn_range_apply *closure,
if (WARN_ON(addr >= end))
return -EINVAL;
- pgd = pgd_offset(closure->mm, addr);
+ pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
- if (!closure->alloc && pgd_none_or_clear_bad(pgd))
- continue;
- err = apply_to_p4d_range(closure, pgd, addr, next);
+ err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
if (err)
break;
} while (pgd++, addr = next, addr != end);
return err;
}
-
-/**
- * struct page_range_apply - Closure structure for apply_to_page_range()
- * @pter: The base closure structure we derive from
- * @fn: The leaf pte function to call
- * @data: The leaf pte function closure
- */
-struct page_range_apply {
- struct pfn_range_apply pter;
- pte_fn_t fn;
- void *data;
-};
-
-/*
- * Callback wrapper to enable use of apply_to_pfn_range for
- * the apply_to_page_range interface
- */
-static int apply_to_page_range_wrapper(pte_t *pte, pgtable_t token,
- unsigned long addr,
- struct pfn_range_apply *pter)
-{
- struct page_range_apply *pra =
- container_of(pter, typeof(*pra), pter);
-
- return pra->fn(pte, token, addr, pra->data);
-}
-
-/*
- * Scan a region of virtual memory, filling in page tables as necessary
- * and calling a provided function on each leaf page table.
- *
- * WARNING: Do not use this function unless you know exactly what you are
- * doing. It is lacking support for huge pages and transparent huge pages.
- */
-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
- unsigned long size, pte_fn_t fn, void *data)
-{
- struct page_range_apply pra = {
- .pter = {.mm = mm,
- .alloc = 1,
- .ptefn = apply_to_page_range_wrapper },
- .fn = fn,
- .data = data
- };
-
- return apply_to_pfn_range(&pra.pter, addr, size);
-}
EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
@@ -2309,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
ret = vmf->vma->vm_ops->page_mkwrite(vmf);
/* Restore original flags so that caller is not surprised */
vmf->flags = old_flags;
- if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
+ if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
return ret;
if (unlikely(!(ret & VM_FAULT_LOCKED))) {
lock_page(page);
@@ -2586,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
vmf->flags |= FAULT_FLAG_MKWRITE;
ret = vma->vm_ops->pfn_mkwrite(vmf);
- if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))
+ if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))
return ret;
return finish_mkwrite_fault(vmf);
}
@@ -2607,8 +2536,7 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp || (tmp &
- (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
- VM_FAULT_RETRY)))) {
+ (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
put_page(vmf->page);
return tmp;
}
@@ -3673,8 +3601,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
unlock_page(vmf->page);
tmp = do_page_mkwrite(vmf);
if (unlikely(!tmp ||
- (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE |
- VM_FAULT_RETRY)))) {
+ (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))) {
put_page(vmf->page);
return tmp;
}