aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h135
1 files changed, 102 insertions, 33 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6b10c21630f5..0e8834ac32b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -124,10 +124,45 @@ extern int mmap_rnd_compat_bits __read_mostly;
/*
* On some architectures it is expensive to call memset() for small sizes.
- * Those architectures should provide their own implementation of "struct page"
- * zeroing by defining this macro in <asm/pgtable.h>.
+ * If an architecture decides to implement their own version of
+ * mm_zero_struct_page they should wrap the defines below in a #ifndef and
+ * define their own version of this macro in <asm/pgtable.h>
*/
-#ifndef mm_zero_struct_page
+#if BITS_PER_LONG == 64
+/* This function must be updated when the size of struct page grows above 80
+ * or reduces below 56. The idea that compiler optimizes out switch()
+ * statement, and only leaves move/store instructions. Also the compiler can
+ * combine write statments if they are both assignments and can be reordered,
+ * this can result in several of the writes here being dropped.
+ */
+#define mm_zero_struct_page(pp) __mm_zero_struct_page(pp)
+static inline void __mm_zero_struct_page(struct page *page)
+{
+ unsigned long *_pp = (void *)page;
+
+ /* Check that struct page is either 56, 64, 72, or 80 bytes */
+ BUILD_BUG_ON(sizeof(struct page) & 7);
+ BUILD_BUG_ON(sizeof(struct page) < 56);
+ BUILD_BUG_ON(sizeof(struct page) > 80);
+
+ switch (sizeof(struct page)) {
+ case 80:
+ _pp[9] = 0; /* fallthrough */
+ case 72:
+ _pp[8] = 0; /* fallthrough */
+ case 64:
+ _pp[7] = 0; /* fallthrough */
+ case 56:
+ _pp[6] = 0;
+ _pp[5] = 0;
+ _pp[4] = 0;
+ _pp[3] = 0;
+ _pp[2] = 0;
+ _pp[1] = 0;
+ _pp[0] = 0;
+ }
+}
+#else
#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page)))
#endif
@@ -501,9 +536,6 @@ static inline void vma_set_anonymous(struct vm_area_struct *vma)
struct mmu_gather;
struct inode;
-#define page_private(page) ((page)->private)
-#define set_page_private(page, v) ((page)->private = (v))
-
#if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
static inline int pmd_devmap(pmd_t pmd)
{
@@ -1007,6 +1039,30 @@ static inline void put_page(struct page *page)
__put_page(page);
}
+/**
+ * put_user_page() - release a gup-pinned page
+ * @page: pointer to page to be released
+ *
+ * Pages that were pinned via get_user_pages*() must be released via
+ * either put_user_page(), or one of the put_user_pages*() routines
+ * below. This is so that eventually, pages that are pinned via
+ * get_user_pages*() can be separately tracked and uniquely handled. In
+ * particular, interactions with RDMA and filesystems need special
+ * handling.
+ *
+ * put_user_page() and put_page() are not interchangeable, despite this early
+ * implementation that makes them look the same. put_user_page() calls must
+ * be perfectly matched up with get_user_page() calls.
+ */
+static inline void put_user_page(struct page *page)
+{
+ put_page(page);
+}
+
+void put_user_pages_dirty(struct page **pages, unsigned long npages);
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
+void put_user_pages(struct page **pages, unsigned long npages);
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
@@ -1505,21 +1561,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
-#if defined(CONFIG_FS_DAX) || defined(CONFIG_CMA)
-long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
- unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas);
-#else
-static inline long get_user_pages_longterm(unsigned long start,
- unsigned long nr_pages, unsigned int gup_flags,
- struct page **pages, struct vm_area_struct **vmas)
-{
- return get_user_pages(start, nr_pages, gup_flags, pages, vmas);
-}
-#endif /* CONFIG_FS_DAX */
-
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages);
+int get_user_pages_fast(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages);
/* Container for pinned pfns / pages */
struct frame_vector {
@@ -2533,6 +2576,10 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+int vm_map_pages(struct vm_area_struct *vma, struct page **pages,
+ unsigned long num);
+int vm_map_pages_zero(struct vm_area_struct *vma, struct page **pages,
+ unsigned long num);
vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn);
vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
@@ -2583,6 +2630,34 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
+#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
+
+/*
+ * NOTE on FOLL_LONGTERM:
+ *
+ * FOLL_LONGTERM indicates that the page will be held for an indefinite time
+ * period _often_ under userspace control. This is contrasted with
+ * iov_iter_get_pages() where usages which are transient.
+ *
+ * FIXME: For pages which are part of a filesystem, mappings are subject to the
+ * lifetime enforced by the filesystem and we need guarantees that longterm
+ * users like RDMA and V4L2 only establish mappings which coordinate usage with
+ * the filesystem. Ideas for this coordination include revoking the longterm
+ * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was
+ * added after the problem with filesystems was found FS DAX VMAs are
+ * specifically failed. Filesystem pages are still subject to bugs and use of
+ * FOLL_LONGTERM should be avoided on those pages.
+ *
+ * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call.
+ * Currently only get_user_pages() and get_user_pages_fast() support this flag
+ * and calls to get_user_pages_[un]locked are specifically not allowed. This
+ * is due to an incompatibility with the FS DAX check and
+ * FAULT_FLAG_ALLOW_RETRY
+ *
+ * In the CMA case: longterm pins in a CMA region would unnecessarily fragment
+ * that region. And so CMA attempts to migrate the page before pinning when
+ * FOLL_LONGTERM is specified.
+ */
static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
{
@@ -2610,37 +2685,31 @@ static inline void kernel_poison_pages(struct page *page, int numpages,
int enable) { }
#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
extern bool _debug_pagealloc_enabled;
-extern void __kernel_map_pages(struct page *page, int numpages, int enable);
static inline bool debug_pagealloc_enabled(void)
{
- return _debug_pagealloc_enabled;
+ return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) && _debug_pagealloc_enabled;
}
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
{
- if (!debug_pagealloc_enabled())
- return;
-
__kernel_map_pages(page, numpages, enable);
}
#ifdef CONFIG_HIBERNATION
extern bool kernel_page_present(struct page *page);
#endif /* CONFIG_HIBERNATION */
-#else /* CONFIG_DEBUG_PAGEALLOC */
+#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
#ifdef CONFIG_HIBERNATION
static inline bool kernel_page_present(struct page *page) { return true; }
#endif /* CONFIG_HIBERNATION */
-static inline bool debug_pagealloc_enabled(void)
-{
- return false;
-}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
+#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);