aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h161
1 files changed, 121 insertions, 40 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 21f8b27bd9fd..8bbcccbc5565 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -661,6 +661,38 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma)
return vma->vm_flags & VM_ACCESS_FLAGS;
}
+static inline
+struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max)
+{
+ return mas_find(&vmi->mas, max);
+}
+
+static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
+{
+ /*
+ * Uses vma_find() to get the first VMA when the iterator starts.
+ * Calling mas_next() could skip the first entry.
+ */
+ return vma_find(vmi, ULONG_MAX);
+}
+
+static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi)
+{
+ return mas_prev(&vmi->mas, 0);
+}
+
+static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
+{
+ return vmi->mas.index;
+}
+
+#define for_each_vma(__vmi, __vma) \
+ while (((__vma) = vma_next(&(__vmi))) != NULL)
+
+/* The MM code likes to work with exclusive end addresses */
+#define for_each_vma_range(__vmi, __vma, __end) \
+ while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL)
+
#ifdef CONFIG_SHMEM
/*
* The vma_is_shmem is not inline because it is used only by slow
@@ -697,7 +729,9 @@ static inline unsigned int compound_order(struct page *page)
*/
static inline unsigned int folio_order(struct folio *folio)
{
- return compound_order(&folio->page);
+ if (!folio_test_large(folio))
+ return 0;
+ return folio->_folio_order;
}
#include <linux/huge_mm.h>
@@ -1255,6 +1289,18 @@ static inline int folio_nid(const struct folio *folio)
}
#ifdef CONFIG_NUMA_BALANCING
+/* page access time bits needs to hold at least 4 seconds */
+#define PAGE_ACCESS_TIME_MIN_BITS 12
+#if LAST_CPUPID_SHIFT < PAGE_ACCESS_TIME_MIN_BITS
+#define PAGE_ACCESS_TIME_BUCKETS \
+ (PAGE_ACCESS_TIME_MIN_BITS - LAST_CPUPID_SHIFT)
+#else
+#define PAGE_ACCESS_TIME_BUCKETS 0
+#endif
+
+#define PAGE_ACCESS_TIME_MASK \
+ (LAST_CPUPID_MASK << PAGE_ACCESS_TIME_BUCKETS)
+
static inline int cpu_pid_to_cpupid(int cpu, int pid)
{
return ((cpu & LAST__CPU_MASK) << LAST__PID_SHIFT) | (pid & LAST__PID_MASK);
@@ -1318,12 +1364,25 @@ static inline void page_cpupid_reset_last(struct page *page)
page->flags |= LAST_CPUPID_MASK << LAST_CPUPID_PGSHIFT;
}
#endif /* LAST_CPUPID_NOT_IN_PAGE_FLAGS */
+
+static inline int xchg_page_access_time(struct page *page, int time)
+{
+ int last_time;
+
+ last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS);
+ return last_time << PAGE_ACCESS_TIME_BUCKETS;
+}
#else /* !CONFIG_NUMA_BALANCING */
static inline int page_cpupid_xchg_last(struct page *page, int cpupid)
{
return page_to_nid(page); /* XXX */
}
+static inline int xchg_page_access_time(struct page *page, int time)
+{
+ return 0;
+}
+
static inline int page_cpupid_last(struct page *page)
{
return page_to_nid(page); /* XXX */
@@ -1465,6 +1524,11 @@ static inline unsigned long folio_pfn(struct folio *folio)
return page_to_pfn(&folio->page);
}
+static inline struct folio *pfn_folio(unsigned long pfn)
+{
+ return page_folio(pfn_to_page(pfn));
+}
+
static inline atomic_t *folio_pincount_ptr(struct folio *folio)
{
return &folio_page(folio, 1)->compound_pincount;
@@ -1597,7 +1661,13 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
*/
static inline long folio_nr_pages(struct folio *folio)
{
- return compound_nr(&folio->page);
+ if (!folio_test_large(folio))
+ return 1;
+#ifdef CONFIG_64BIT
+ return folio->_folio_nr_pages;
+#else
+ return 1L << folio->_folio_order;
+#endif
}
/**
@@ -1776,7 +1846,11 @@ extern void pagefault_out_of_memory(void);
*/
#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
-extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
+extern void __show_free_areas(unsigned int flags, nodemask_t *nodemask, int max_zone_idx);
+static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodemask)
+{
+ __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1);
+}
#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
@@ -1795,8 +1869,9 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size);
-void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
- unsigned long start, unsigned long end);
+void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
+ struct vm_area_struct *start_vma, unsigned long start,
+ unsigned long end);
struct mmu_notifier_range;
@@ -2495,7 +2570,6 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
unsigned long end_pfn);
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
-extern unsigned long find_min_pfn_with_active_regions(void);
#ifndef CONFIG_NUMA
static inline int early_pfn_to_nid(unsigned long pfn)
@@ -2516,7 +2590,12 @@ extern void calculate_min_free_kbytes(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
-extern void show_mem(unsigned int flags, nodemask_t *nodemask);
+
+extern void __show_mem(unsigned int flags, nodemask_t *nodemask, int max_zone_idx);
+static inline void show_mem(unsigned int flags, nodemask_t *nodemask)
+{
+ __show_mem(flags, nodemask, MAX_NR_ZONES - 1);
+}
extern long si_mem_available(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
@@ -2593,14 +2672,15 @@ extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
extern int split_vma(struct mm_struct *, struct vm_area_struct *,
unsigned long addr, int new_below);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
-extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
- struct rb_node **, struct rb_node *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff,
bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);
+void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
+void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);
+
static inline int check_data_rlimit(unsigned long rlim,
unsigned long new,
unsigned long start,
@@ -2648,8 +2728,9 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr,
extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
unsigned long pgoff, unsigned long *populate, struct list_head *uf);
-extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
- struct list_head *uf, bool downgrade);
+extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm,
+ unsigned long start, size_t len, struct list_head *uf,
+ bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);
@@ -2716,26 +2797,12 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add
extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
struct vm_area_struct **pprev);
-/**
- * find_vma_intersection() - Look up the first VMA which intersects the interval
- * @mm: The process address space.
- * @start_addr: The inclusive start user address.
- * @end_addr: The exclusive end user address.
- *
- * Returns: The first VMA within the provided range, %NULL otherwise. Assumes
- * start_addr < end_addr.
+/*
+ * Look up the first VMA which intersects the interval [start_addr, end_addr)
+ * NULL if none. Assume start_addr < end_addr.
*/
-static inline
struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
- unsigned long start_addr,
- unsigned long end_addr)
-{
- struct vm_area_struct *vma = find_vma(mm, start_addr);
-
- if (vma && end_addr <= vma->vm_start)
- vma = NULL;
- return vma;
-}
+ unsigned long start_addr, unsigned long end_addr);
/**
* vma_lookup() - Find a VMA at a specific address
@@ -2747,12 +2814,7 @@ struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
static inline
struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
{
- struct vm_area_struct *vma = find_vma(mm, addr);
-
- if (vma && addr < vma->vm_start)
- vma = NULL;
-
- return vma;
+ return mtree_load(&mm->mm_mt, addr);
}
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
@@ -2788,7 +2850,7 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
unsigned long vm_start, unsigned long vm_end)
{
- struct vm_area_struct *vma = find_vma(mm, vm_start);
+ struct vm_area_struct *vma = vma_lookup(mm, vm_start);
if (vma && (vma->vm_start != vm_start || vma->vm_end != vm_end))
vma = NULL;
@@ -2888,7 +2950,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
* and return without waiting upon it */
#define FOLL_NOFAULT 0x80 /* do not fault in pages */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
-#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
@@ -2975,8 +3036,8 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* PageAnonExclusive() has to protect against concurrent GUP:
* * Ordinary GUP: Using the PT lock
* * GUP-fast and fork(): mm->write_protect_seq
- * * GUP-fast and KSM or temporary unmapping (swap, migration):
- * clear/invalidate+flush of the page table entry
+ * * GUP-fast and KSM or temporary unmapping (swap, migration): see
+ * page_try_share_anon_rmap()
*
* Must be called with the (sub)page that's actually referenced via the
* page table entry, which might not necessarily be the head page for a
@@ -2997,6 +3058,11 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page)
*/
if (!PageAnon(page))
return false;
+
+ /* Paired with a memory barrier in page_try_share_anon_rmap(). */
+ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
+ smp_rmb();
+
/*
* Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned.
@@ -3004,6 +3070,21 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page)
return !PageAnonExclusive(page);
}
+/*
+ * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
+ * a (NUMA hinting) fault is required.
+ */
+static inline bool gup_can_follow_protnone(unsigned int flags)
+{
+ /*
+ * FOLL_FORCE has to be able to make progress even if the VMA is
+ * inaccessible. Further, FOLL_FORCE access usually does not represent
+ * application behaviour and we should avoid triggering NUMA hinting
+ * faults.
+ */
+ return flags & FOLL_FORCE;
+}
+
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
@@ -3011,7 +3092,7 @@ extern int apply_to_existing_page_range(struct mm_struct *mm,
unsigned long address, unsigned long size,
pte_fn_t fn, void *data);
-extern void init_mem_debugging_and_hardening(void);
+extern void __init init_mem_debugging_and_hardening(void);
#ifdef CONFIG_PAGE_POISONING
extern void __kernel_poison_pages(struct page *page, int numpages);
extern void __kernel_unpoison_pages(struct page *page, int numpages);