aboutsummaryrefslogtreecommitdiffstats
path: root/mm/util.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mm/util.c514
1 files changed, 397 insertions, 117 deletions
diff --git a/mm/util.c b/mm/util.c
index 988d11e6c17c..12984e76767e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -27,6 +27,7 @@
#include <linux/uaccess.h>
#include "internal.h"
+#include "swap.h"
/**
* kfree_const - conditionally free memory
@@ -69,7 +70,8 @@ EXPORT_SYMBOL(kstrdup);
* @s: the string to duplicate
* @gfp: the GFP mask used in the kmalloc() call when allocating memory
*
- * Note: Strings allocated by kstrdup_const should be freed by kfree_const.
+ * Note: Strings allocated by kstrdup_const should be freed by kfree_const and
+ * must not be passed to krealloc().
*
* Return: source string if it is in .rodata section otherwise
* fallback to kstrdup.
@@ -270,38 +272,6 @@ void *memdup_user_nul(const void __user *src, size_t len)
}
EXPORT_SYMBOL(memdup_user_nul);
-void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
- struct vm_area_struct *prev)
-{
- struct vm_area_struct *next;
-
- vma->vm_prev = prev;
- if (prev) {
- next = prev->vm_next;
- prev->vm_next = vma;
- } else {
- next = mm->mmap;
- mm->mmap = vma;
- }
- vma->vm_next = next;
- if (next)
- next->vm_prev = vma;
-}
-
-void __vma_unlink_list(struct mm_struct *mm, struct vm_area_struct *vma)
-{
- struct vm_area_struct *prev, *next;
-
- next = vma->vm_next;
- prev = vma->vm_prev;
- if (prev)
- prev->vm_next = next;
- else
- mm->mmap = next;
- if (next)
- next->vm_prev = prev;
-}
-
/* Check if the vma is being used as a stack by this task */
int vma_is_stack_for_current(struct vm_area_struct *vma)
{
@@ -310,6 +280,18 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
}
+/*
+ * Change backing file, only valid to use during initial VMA setup.
+ */
+void vma_set_file(struct vm_area_struct *vma, struct file *file)
+{
+ /* Changing an anonymous vma with this is illegal */
+ get_file(file);
+ swap(vma->vm_file, file);
+ fput(file);
+}
+EXPORT_SYMBOL(vma_set_file);
+
#ifndef STACK_RND_MASK
#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
#endif
@@ -330,8 +312,40 @@ unsigned long randomize_stack_top(unsigned long stack_top)
#endif
}
+/**
+ * randomize_page - Generate a random, page aligned address
+ * @start: The smallest acceptable address the caller will take.
+ * @range: The size of the area, starting at @start, within which the
+ * random address must fall.
+ *
+ * If @start + @range would overflow, @range is capped.
+ *
+ * NOTE: Historical use of randomize_range, which this replaces, presumed that
+ * @start was already page aligned. We now align it regardless.
+ *
+ * Return: A page aligned address within [start, start + range). On error,
+ * @start is returned.
+ */
+unsigned long randomize_page(unsigned long start, unsigned long range)
+{
+ if (!PAGE_ALIGNED(start)) {
+ range -= PAGE_ALIGN(start) - start;
+ start = PAGE_ALIGN(start);
+ }
+
+ if (start > ULONG_MAX - range)
+ range = ULONG_MAX - start;
+
+ range >>= PAGE_SHIFT;
+
+ if (range == 0)
+ return start;
+
+ return start + (get_random_long() % range << PAGE_SHIFT);
+}
+
#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
-unsigned long arch_randomize_brk(struct mm_struct *mm)
+unsigned long __weak arch_randomize_brk(struct mm_struct *mm)
{
/* Is the current task 32bit ? */
if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
@@ -425,7 +439,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
* @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
*
* Assumes @task and @mm are valid (i.e. at least one reference on each), and
- * that mmap_sem is held as writer.
+ * that mmap_lock is held as writer.
*
* Return:
* * 0 on success
@@ -437,7 +451,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
unsigned long locked_vm, limit;
int ret = 0;
- lockdep_assert_held_write(&mm->mmap_sem);
+ mmap_assert_write_locked(mm);
locked_vm = mm->locked_vm;
if (inc) {
@@ -481,10 +495,10 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
if (pages == 0 || !mm)
return 0;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
ret = __account_locked_vm(mm, pages, inc, current,
capable(CAP_IPC_LOCK));
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
@@ -501,11 +515,11 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
ret = security_mmap_file(file, prot, flag);
if (!ret) {
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
- ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
- &populate, &uf);
- up_write(&mm->mmap_sem);
+ ret = do_mmap(file, addr, len, prot, flag, pgoff, &populate,
+ &uf);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(ret, populate);
@@ -536,13 +550,10 @@ EXPORT_SYMBOL(vm_mmap);
* Uses kmalloc to get the memory but if the allocation fails then falls back
* to the vmalloc allocator. Use kvfree for freeing the memory.
*
- * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported.
+ * GFP_NOWAIT and GFP_ATOMIC are not supported, neither is the __GFP_NORETRY modifier.
* __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is
* preferable to the vmalloc fallback, due to visible performance drawbacks.
*
- * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not
- * fall back to vmalloc.
- *
* Return: pointer to the allocated memory of %NULL in case of failure
*/
void *kvmalloc_node(size_t size, gfp_t flags, int node)
@@ -551,13 +562,6 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
void *ret;
/*
- * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
- * so the given set of flags has to be compatible.
- */
- if ((flags & GFP_KERNEL) != GFP_KERNEL)
- return kmalloc_node(size, flags, node);
-
- /*
* We want to attempt a large physically contiguous block first because
* it is less likely to fragment multiple larger blocks and therefore
* contribute to a long term fragmentation less than vmalloc fallback.
@@ -569,6 +573,9 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL))
kmalloc_flags |= __GFP_NORETRY;
+
+ /* nofail semantic is implemented by the vmalloc fallback */
+ kmalloc_flags &= ~__GFP_NOFAIL;
}
ret = kmalloc_node(size, kmalloc_flags, node);
@@ -580,8 +587,25 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
if (ret || size <= PAGE_SIZE)
return ret;
- return __vmalloc_node_flags_caller(size, node, flags,
- __builtin_return_address(0));
+ /* non-sleeping allocations are not supported by vmalloc */
+ if (!gfpflags_allow_blocking(flags))
+ return NULL;
+
+ /* Don't even allow crazy sizes */
+ if (unlikely(size > INT_MAX)) {
+ WARN_ON_ONCE(!(flags & __GFP_NOWARN));
+ return NULL;
+ }
+
+ /*
+ * kvmalloc() can always use VM_ALLOW_HUGE_VMAP,
+ * since the callers already cannot assume anything
+ * about the resulting pointer, and cannot play
+ * protection games.
+ */
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+ node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kvmalloc_node);
@@ -604,91 +628,160 @@ void kvfree(const void *addr)
}
EXPORT_SYMBOL(kvfree);
-static inline void *__page_rmapping(struct page *page)
+/**
+ * kvfree_sensitive - Free a data object containing sensitive information.
+ * @addr: address of the data object to be freed.
+ * @len: length of the data object.
+ *
+ * Use the special memzero_explicit() function to clear the content of a
+ * kvmalloc'ed object containing sensitive data to make sure that the
+ * compiler won't optimize out the data clearing.
+ */
+void kvfree_sensitive(const void *addr, size_t len)
+{
+ if (likely(!ZERO_OR_NULL_PTR(addr))) {
+ memzero_explicit((void *)addr, len);
+ kvfree(addr);
+ }
+}
+EXPORT_SYMBOL(kvfree_sensitive);
+
+void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+{
+ void *newp;
+
+ if (oldsize >= newsize)
+ return (void *)p;
+ newp = kvmalloc(newsize, flags);
+ if (!newp)
+ return NULL;
+ memcpy(newp, p, oldsize);
+ kvfree(p);
+ return newp;
+}
+EXPORT_SYMBOL(kvrealloc);
+
+/**
+ * __vmalloc_array - allocate memory for a virtually contiguous array.
+ * @n: number of elements.
+ * @size: element size.
+ * @flags: the type of memory to allocate (see kmalloc).
+ */
+void *__vmalloc_array(size_t n, size_t size, gfp_t flags)
{
- unsigned long mapping;
+ size_t bytes;
- mapping = (unsigned long)page->mapping;
- mapping &= ~PAGE_MAPPING_FLAGS;
+ if (unlikely(check_mul_overflow(n, size, &bytes)))
+ return NULL;
+ return __vmalloc(bytes, flags);
+}
+EXPORT_SYMBOL(__vmalloc_array);
- return (void *)mapping;
+/**
+ * vmalloc_array - allocate memory for a virtually contiguous array.
+ * @n: number of elements.
+ * @size: element size.
+ */
+void *vmalloc_array(size_t n, size_t size)
+{
+ return __vmalloc_array(n, size, GFP_KERNEL);
}
+EXPORT_SYMBOL(vmalloc_array);
+
+/**
+ * __vcalloc - allocate and zero memory for a virtually contiguous array.
+ * @n: number of elements.
+ * @size: element size.
+ * @flags: the type of memory to allocate (see kmalloc).
+ */
+void *__vcalloc(size_t n, size_t size, gfp_t flags)
+{
+ return __vmalloc_array(n, size, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL(__vcalloc);
+
+/**
+ * vcalloc - allocate and zero memory for a virtually contiguous array.
+ * @n: number of elements.
+ * @size: element size.
+ */
+void *vcalloc(size_t n, size_t size)
+{
+ return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO);
+}
+EXPORT_SYMBOL(vcalloc);
/* Neutral page->mapping pointer to address_space or anon_vma or other */
void *page_rmapping(struct page *page)
{
- page = compound_head(page);
- return __page_rmapping(page);
+ return folio_raw_mapping(page_folio(page));
}
-/*
- * Return true if this page is mapped into pagetables.
- * For compound page it returns true if any subpage of compound page is mapped.
+/**
+ * folio_mapped - Is this folio mapped into userspace?
+ * @folio: The folio.
+ *
+ * Return: True if any page in this folio is referenced by user page tables.
*/
-bool page_mapped(struct page *page)
+bool folio_mapped(struct folio *folio)
{
- int i;
+ long i, nr;
- if (likely(!PageCompound(page)))
- return atomic_read(&page->_mapcount) >= 0;
- page = compound_head(page);
- if (atomic_read(compound_mapcount_ptr(page)) >= 0)
+ if (!folio_test_large(folio))
+ return atomic_read(&folio->_mapcount) >= 0;
+ if (atomic_read(folio_mapcount_ptr(folio)) >= 0)
return true;
- if (PageHuge(page))
+ if (folio_test_hugetlb(folio))
return false;
- for (i = 0; i < compound_nr(page); i++) {
- if (atomic_read(&page[i]._mapcount) >= 0)
+
+ nr = folio_nr_pages(folio);
+ for (i = 0; i < nr; i++) {
+ if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0)
return true;
}
return false;
}
-EXPORT_SYMBOL(page_mapped);
+EXPORT_SYMBOL(folio_mapped);
-struct anon_vma *page_anon_vma(struct page *page)
+struct anon_vma *folio_anon_vma(struct folio *folio)
{
- unsigned long mapping;
+ unsigned long mapping = (unsigned long)folio->mapping;
- page = compound_head(page);
- mapping = (unsigned long)page->mapping;
if ((mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
return NULL;
- return __page_rmapping(page);
+ return (void *)(mapping - PAGE_MAPPING_ANON);
}
-struct address_space *page_mapping(struct page *page)
+/**
+ * folio_mapping - Find the mapping where this folio is stored.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to. Folios in the swap cache return the swap mapping
+ * this page is stored in (which is different from the mapping for the
+ * swap file or swap device where the data is stored).
+ *
+ * You can call this for folios which aren't in the swap cache or page
+ * cache and it will return NULL.
+ */
+struct address_space *folio_mapping(struct folio *folio)
{
struct address_space *mapping;
- page = compound_head(page);
-
/* This happens if someone calls flush_dcache_page on slab page */
- if (unlikely(PageSlab(page)))
+ if (unlikely(folio_test_slab(folio)))
return NULL;
- if (unlikely(PageSwapCache(page))) {
- swp_entry_t entry;
-
- entry.val = page_private(page);
- return swap_address_space(entry);
- }
+ if (unlikely(folio_test_swapcache(folio)))
+ return swap_address_space(folio_swap_entry(folio));
- mapping = page->mapping;
- if ((unsigned long)mapping & PAGE_MAPPING_ANON)
+ mapping = folio->mapping;
+ if ((unsigned long)mapping & PAGE_MAPPING_FLAGS)
return NULL;
- return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
-}
-EXPORT_SYMBOL(page_mapping);
-
-/*
- * For file cache pages, return the address_space, otherwise return NULL
- */
-struct address_space *page_mapping_file(struct page *page)
-{
- if (unlikely(PageSwapCache(page)))
- return NULL;
- return page_mapping(page);
+ return mapping;
}
+EXPORT_SYMBOL(folio_mapping);
/* Slow path of page_mapcount() for compound pages */
int __page_mapcount(struct page *page)
@@ -710,6 +803,62 @@ int __page_mapcount(struct page *page)
}
EXPORT_SYMBOL_GPL(__page_mapcount);
+/**
+ * folio_mapcount() - Calculate the number of mappings of this folio.
+ * @folio: The folio.
+ *
+ * A large folio tracks both how many times the entire folio is mapped,
+ * and how many times each individual page in the folio is mapped.
+ * This function calculates the total number of times the folio is
+ * mapped.
+ *
+ * Return: The number of times this folio is mapped.
+ */
+int folio_mapcount(struct folio *folio)
+{
+ int i, compound, nr, ret;
+
+ if (likely(!folio_test_large(folio)))
+ return atomic_read(&folio->_mapcount) + 1;
+
+ compound = folio_entire_mapcount(folio);
+ if (folio_test_hugetlb(folio))
+ return compound;
+ ret = compound;
+ nr = folio_nr_pages(folio);
+ for (i = 0; i < nr; i++)
+ ret += atomic_read(&folio_page(folio, i)->_mapcount) + 1;
+ /* File pages has compound_mapcount included in _mapcount */
+ if (!folio_test_anon(folio))
+ return ret - compound * nr;
+ if (folio_test_double_map(folio))
+ ret -= nr;
+ return ret;
+}
+
+/**
+ * folio_copy - Copy the contents of one folio to another.
+ * @dst: Folio to copy to.
+ * @src: Folio to copy from.
+ *
+ * The bytes in the folio represented by @src are copied to @dst.
+ * Assumes the caller has validated that @dst is at least as large as @src.
+ * Can be called in atomic context for order-0 folios, but if the folio is
+ * larger, it may sleep.
+ */
+void folio_copy(struct folio *dst, struct folio *src)
+{
+ long i = 0;
+ long nr = folio_nr_pages(src);
+
+ for (;;) {
+ copy_highpage(folio_page(dst, i), folio_page(src, i));
+ if (++i == nr)
+ break;
+ cond_resched();
+ }
+}
+
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;
int sysctl_overcommit_ratio __read_mostly = 50;
unsigned long sysctl_overcommit_kbytes __read_mostly;
@@ -717,9 +866,8 @@ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
-int overcommit_ratio_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
int ret;
@@ -729,9 +877,49 @@ int overcommit_ratio_handler(struct ctl_table *table, int write,
return ret;
}
-int overcommit_kbytes_handler(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+static void sync_overcommit_as(struct work_struct *dummy)
+{
+ percpu_counter_sync(&vm_committed_as);
+}
+
+int overcommit_policy_handler(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
+{
+ struct ctl_table t;
+ int new_policy = -1;
+ int ret;
+
+ /*
+ * The deviation of sync_overcommit_as could be big with loose policy
+ * like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to
+ * strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply
+ * with the strict "NEVER", and to avoid possible race condition (even
+ * though user usually won't too frequently do the switching to policy
+ * OVERCOMMIT_NEVER), the switch is done in the following order:
+ * 1. changing the batch
+ * 2. sync percpu count on each CPU
+ * 3. switch the policy
+ */
+ if (write) {
+ t = *table;
+ t.data = &new_policy;
+ ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+ if (ret || new_policy == -1)
+ return ret;
+
+ mm_compute_batch(new_policy);
+ if (new_policy == OVERCOMMIT_NEVER)
+ schedule_on_each_cpu(sync_overcommit_as);
+ sysctl_overcommit_memory = new_policy;
+ } else {
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ }
+
+ return ret;
+}
+
+int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer,
+ size_t *lenp, loff_t *ppos)
{
int ret;
@@ -771,10 +959,15 @@ struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
* balancing memory across competing virtual machines that are hosted.
* Several metrics drive this policy engine including the guest reported
* memory commitment.
+ *
+ * The time cost of this is very low for small platforms, and for big
+ * platform like a 2S/36C/72T Skylake server, in worst case where
+ * vm_committed_as's spinlock is under severe contention, the time cost
+ * could be about 30~40 microseconds.
*/
unsigned long vm_memory_committed(void)
{
- return percpu_counter_read_positive(&vm_committed_as);
+ return percpu_counter_sum_positive(&vm_committed_as);
}
EXPORT_SYMBOL_GPL(vm_memory_committed);
@@ -784,7 +977,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed);
* succeed and -ENOMEM implies there is not.
*
* We currently support three overcommit policies, which are set via the
- * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting.rst
+ * vm.overcommit_memory sysctl. See Documentation/mm/overcommit-accounting.rst
*
* Strict overcommit modes added 2002 Feb 26 by Alan Cox.
* Additional code 2002 Jul 20 by Robert Love.
@@ -798,10 +991,6 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
{
long allowed;
- VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) <
- -(s64)vm_committed_as_batch * num_online_cpus(),
- "memory commitment underflow");
-
vm_acct_memory(pages);
/*
@@ -835,6 +1024,8 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
if (percpu_counter_read_positive(&vm_committed_as) < allowed)
return 0;
error:
+ pr_warn_ratelimited("%s: pid: %d, comm: %s, no enough memory for the allocation\n",
+ __func__, current->pid, current->comm);
vm_unacct_memory(pages);
return -ENOMEM;
@@ -899,7 +1090,7 @@ out:
return res;
}
-int memcmp_pages(struct page *page1, struct page *page2)
+int __weak memcmp_pages(struct page *page1, struct page *page2)
{
char *addr1, *addr2;
int ret;
@@ -911,3 +1102,92 @@ int memcmp_pages(struct page *page1, struct page *page2)
kunmap_atomic(addr1);
return ret;
}
+
+#ifdef CONFIG_PRINTK
+/**
+ * mem_dump_obj - Print available provenance information
+ * @object: object for which to find provenance information.
+ *
+ * This function uses pr_cont(), so that the caller is expected to have
+ * printed out whatever preamble is appropriate. The provenance information
+ * depends on the type of object and on how much debugging is enabled.
+ * For example, for a slab-cache object, the slab name is printed, and,
+ * if available, the return address and stack trace from the allocation
+ * and last free path of that object.
+ */
+void mem_dump_obj(void *object)
+{
+ const char *type;
+
+ if (kmem_valid_obj(object)) {
+ kmem_dump_obj(object);
+ return;
+ }
+
+ if (vmalloc_dump_obj(object))
+ return;
+
+ if (virt_addr_valid(object))
+ type = "non-slab/vmalloc memory";
+ else if (object == NULL)
+ type = "NULL pointer";
+ else if (object == ZERO_SIZE_PTR)
+ type = "zero-size pointer";
+ else
+ type = "non-paged memory";
+
+ pr_cont(" %s\n", type);
+}
+EXPORT_SYMBOL_GPL(mem_dump_obj);
+#endif
+
+/*
+ * A driver might set a page logically offline -- PageOffline() -- and
+ * turn the page inaccessible in the hypervisor; after that, access to page
+ * content can be fatal.
+ *
+ * Some special PFN walkers -- i.e., /proc/kcore -- read content of random
+ * pages after checking PageOffline(); however, these PFN walkers can race
+ * with drivers that set PageOffline().
+ *
+ * page_offline_freeze()/page_offline_thaw() allows for a subsystem to
+ * synchronize with such drivers, achieving that a page cannot be set
+ * PageOffline() while frozen.
+ *
+ * page_offline_begin()/page_offline_end() is used by drivers that care about
+ * such races when setting a page PageOffline().
+ */
+static DECLARE_RWSEM(page_offline_rwsem);
+
+void page_offline_freeze(void)
+{
+ down_read(&page_offline_rwsem);
+}
+
+void page_offline_thaw(void)
+{
+ up_read(&page_offline_rwsem);
+}
+
+void page_offline_begin(void)
+{
+ down_write(&page_offline_rwsem);
+}
+EXPORT_SYMBOL(page_offline_begin);
+
+void page_offline_end(void)
+{
+ up_write(&page_offline_rwsem);
+}
+EXPORT_SYMBOL(page_offline_end);
+
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio)
+{
+ long i, nr = folio_nr_pages(folio);
+
+ for (i = 0; i < nr; i++)
+ flush_dcache_page(folio_page(folio, i));
+}
+EXPORT_SYMBOL(flush_dcache_folio);
+#endif