aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup_benchmark.c8
-rw-r--r--mm/huge_memory.c38
-rw-r--r--mm/hugetlb.c51
-rw-r--r--mm/kasan/common.c36
-rw-r--r--mm/memcontrol.c37
-rw-r--r--mm/memory.c136
-rw-r--r--mm/memory_hotplug.c31
-rw-r--r--mm/mempolicy.c10
-rw-r--r--mm/memremap.c2
-rw-r--r--mm/migrate.c23
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page-writeback.c10
-rw-r--r--mm/page_alloc.c61
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slab_common.c3
-rw-r--r--mm/slub.c2
-rw-r--r--mm/sparse.c9
-rw-r--r--mm/vmalloc.c133
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/zsmalloc.c5
22 files changed, 373 insertions, 243 deletions
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 7dd602d7f8db..ad9d5b1c4473 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -26,6 +26,7 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
unsigned long i, nr_pages, addr, next;
int nr;
struct page **pages;
+ int ret = 0;
if (gup->size > ULONG_MAX)
return -EINVAL;
@@ -63,7 +64,9 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
NULL);
break;
default:
- return -1;
+ kvfree(pages);
+ ret = -EINVAL;
+ goto out;
}
if (nr <= 0)
@@ -85,7 +88,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
gup->put_delta_usec = ktime_us_delta(end_time, start_time);
kvfree(pages);
- return 0;
+out:
+ return ret;
}
static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 41a0fbddc96b..a88093213674 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -527,13 +527,13 @@ void prep_transhuge_page(struct page *page)
set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR);
}
-static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len,
+static unsigned long __thp_get_unmapped_area(struct file *filp,
+ unsigned long addr, unsigned long len,
loff_t off, unsigned long flags, unsigned long size)
{
- unsigned long addr;
loff_t off_end = off + len;
loff_t off_align = round_up(off, size);
- unsigned long len_pad;
+ unsigned long len_pad, ret;
if (off_end <= off_align || (off_end - off_align) < size)
return 0;
@@ -542,30 +542,40 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long le
if (len_pad < len || (off + len_pad) < off)
return 0;
- addr = current->mm->get_unmapped_area(filp, 0, len_pad,
+ ret = current->mm->get_unmapped_area(filp, addr, len_pad,
off >> PAGE_SHIFT, flags);
- if (IS_ERR_VALUE(addr))
+
+ /*
+ * The failure might be due to length padding. The caller will retry
+ * without the padding.
+ */
+ if (IS_ERR_VALUE(ret))
return 0;
- addr += (off - addr) & (size - 1);
- return addr;
+ /*
+ * Do not try to align to THP boundary if allocation at the address
+ * hint succeeds.
+ */
+ if (ret == addr)
+ return addr;
+
+ ret += (off - ret) & (size - 1);
+ return ret;
}
unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
+ unsigned long ret;
loff_t off = (loff_t)pgoff << PAGE_SHIFT;
- if (addr)
- goto out;
if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD))
goto out;
- addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE);
- if (addr)
- return addr;
-
- out:
+ ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE);
+ if (ret)
+ return ret;
+out:
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
EXPORT_SYMBOL_GPL(thp_get_unmapped_area);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ac65bb5e38ac..dd8737a94bec 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -27,6 +27,7 @@
#include <linux/swapops.h>
#include <linux/jhash.h>
#include <linux/numa.h>
+#include <linux/llist.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -1136,7 +1137,7 @@ static inline void ClearPageHugeTemporary(struct page *page)
page[2].mapping = NULL;
}
-void free_huge_page(struct page *page)
+static void __free_huge_page(struct page *page)
{
/*
* Can't pass hstate in here because it is called from the
@@ -1199,6 +1200,54 @@ void free_huge_page(struct page *page)
spin_unlock(&hugetlb_lock);
}
+/*
+ * As free_huge_page() can be called from a non-task context, we have
+ * to defer the actual freeing in a workqueue to prevent potential
+ * hugetlb_lock deadlock.
+ *
+ * free_hpage_workfn() locklessly retrieves the linked list of pages to
+ * be freed and frees them one-by-one. As the page->mapping pointer is
+ * going to be cleared in __free_huge_page() anyway, it is reused as the
+ * llist_node structure of a lockless linked list of huge pages to be freed.
+ */
+static LLIST_HEAD(hpage_freelist);
+
+static void free_hpage_workfn(struct work_struct *work)
+{
+ struct llist_node *node;
+ struct page *page;
+
+ node = llist_del_all(&hpage_freelist);
+
+ while (node) {
+ page = container_of((struct address_space **)node,
+ struct page, mapping);
+ node = node->next;
+ __free_huge_page(page);
+ }
+}
+static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
+
+void free_huge_page(struct page *page)
+{
+ /*
+ * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
+ */
+ if (!in_task()) {
+ /*
+ * Only call schedule_work() if hpage_freelist is previously
+ * empty. Otherwise, schedule_work() had been called but the
+ * workfn hasn't retrieved the list yet.
+ */
+ if (llist_add((struct llist_node *)&page->mapping,
+ &hpage_freelist))
+ schedule_work(&free_hpage_work);
+ return;
+ }
+
+ __free_huge_page(page);
+}
+
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
INIT_LIST_HEAD(&page->lru);
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 2fa710bb6358..c15d8ae68c96 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -778,15 +778,17 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
return 0;
}
-int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
{
unsigned long shadow_start, shadow_end;
int ret;
- shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr);
+ if (!is_vmalloc_or_module_addr((void *)addr))
+ return 0;
+
+ shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
- shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr +
- area->size);
+ shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
shadow_end = ALIGN(shadow_end, PAGE_SIZE);
ret = apply_to_page_range(&init_mm, shadow_start,
@@ -797,10 +799,6 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
flush_cache_vmap(shadow_start, shadow_end);
- kasan_unpoison_shadow(area->addr, requested_size);
-
- area->flags |= VM_KASAN;
-
/*
* We need to be careful about inter-cpu effects here. Consider:
*
@@ -843,12 +841,23 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
* Poison the shadow for a vmalloc region. Called as part of the
* freeing process at the time the region is freed.
*/
-void kasan_poison_vmalloc(void *start, unsigned long size)
+void kasan_poison_vmalloc(const void *start, unsigned long size)
{
+ if (!is_vmalloc_or_module_addr(start))
+ return;
+
size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
}
+void kasan_unpoison_vmalloc(const void *start, unsigned long size)
+{
+ if (!is_vmalloc_or_module_addr(start))
+ return;
+
+ kasan_unpoison_shadow(start, size);
+}
+
static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
void *unused)
{
@@ -948,6 +957,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
{
void *shadow_start, *shadow_end;
unsigned long region_start, region_end;
+ unsigned long size;
region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
@@ -970,9 +980,11 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
shadow_end = kasan_mem_to_shadow((void *)region_end);
if (shadow_end > shadow_start) {
- apply_to_page_range(&init_mm, (unsigned long)shadow_start,
- (unsigned long)(shadow_end - shadow_start),
- kasan_depopulate_vmalloc_pte, NULL);
+ size = shadow_end - shadow_start;
+ apply_to_existing_page_range(&init_mm,
+ (unsigned long)shadow_start,
+ size, kasan_depopulate_vmalloc_pte,
+ NULL);
flush_tlb_kernel_range((unsigned long)shadow_start,
(unsigned long)shadow_end);
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c5b5f74cfd4d..6c83cf4ed970 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3287,49 +3287,34 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
}
}
-static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only)
+static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
{
- unsigned long stat[MEMCG_NR_STAT];
+ unsigned long stat[MEMCG_NR_STAT] = {0};
struct mem_cgroup *mi;
int node, cpu, i;
- int min_idx, max_idx;
-
- if (slab_only) {
- min_idx = NR_SLAB_RECLAIMABLE;
- max_idx = NR_SLAB_UNRECLAIMABLE;
- } else {
- min_idx = 0;
- max_idx = MEMCG_NR_STAT;
- }
-
- for (i = min_idx; i < max_idx; i++)
- stat[i] = 0;
for_each_online_cpu(cpu)
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < MEMCG_NR_STAT; i++)
stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu);
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < MEMCG_NR_STAT; i++)
atomic_long_add(stat[i], &mi->vmstats[i]);
- if (!slab_only)
- max_idx = NR_VM_NODE_STAT_ITEMS;
-
for_each_node(node) {
struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
struct mem_cgroup_per_node *pi;
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
stat[i] = 0;
for_each_online_cpu(cpu)
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
stat[i] += per_cpu(
pn->lruvec_stat_cpu->count[i], cpu);
for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
- for (i = min_idx; i < max_idx; i++)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
atomic_long_add(stat[i], &pi->lruvec_stat[i]);
}
}
@@ -3403,13 +3388,9 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
parent = root_mem_cgroup;
/*
- * Deactivate and reparent kmem_caches. Then flush percpu
- * slab statistics to have precise values at the parent and
- * all ancestor levels. It's required to keep slab stats
- * accurate after the reparenting of kmem_caches.
+ * Deactivate and reparent kmem_caches.
*/
memcg_deactivate_kmem_caches(memcg, parent);
- memcg_flush_percpu_vmstats(memcg, true);
kmemcg_id = memcg->kmemcg_id;
BUG_ON(kmemcg_id < 0);
@@ -4913,7 +4894,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
* Flush percpu vmstats and vmevents to guarantee the value correctness
* on parent's and all ancestor levels.
*/
- memcg_flush_percpu_vmstats(memcg, false);
+ memcg_flush_percpu_vmstats(memcg);
memcg_flush_percpu_vmevents(memcg);
__mem_cgroup_free(memcg);
}
diff --git a/mm/memory.c b/mm/memory.c
index 17aadc751e5c..e24c7d22b117 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2057,26 +2057,34 @@ EXPORT_SYMBOL(vm_iomap_memory);
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pte_t *pte;
- int err;
+ int err = 0;
spinlock_t *uninitialized_var(ptl);
- pte = (mm == &init_mm) ?
- pte_alloc_kernel(pmd, addr) :
- pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (!pte)
- return -ENOMEM;
+ if (create) {
+ pte = (mm == &init_mm) ?
+ pte_alloc_kernel(pmd, addr) :
+ pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+ } else {
+ pte = (mm == &init_mm) ?
+ pte_offset_kernel(pmd, addr) :
+ pte_offset_map_lock(mm, pmd, addr, &ptl);
+ }
BUG_ON(pmd_huge(*pmd));
arch_enter_lazy_mmu_mode();
do {
- err = fn(pte++, addr, data);
- if (err)
- break;
+ if (create || !pte_none(*pte)) {
+ err = fn(pte++, addr, data);
+ if (err)
+ break;
+ }
} while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
@@ -2088,77 +2096,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pmd_t *pmd;
unsigned long next;
- int err;
+ int err = 0;
BUG_ON(pud_huge(*pud));
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
+ if (create) {
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ } else {
+ pmd = pmd_offset(pud, addr);
+ }
do {
next = pmd_addr_end(addr, end);
- err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
- if (err)
- break;
+ if (create || !pmd_none_or_clear_bad(pmd)) {
+ err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (pmd++, addr = next, addr != end);
return err;
}
static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pud_t *pud;
unsigned long next;
- int err;
+ int err = 0;
- pud = pud_alloc(mm, p4d, addr);
- if (!pud)
- return -ENOMEM;
+ if (create) {
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return -ENOMEM;
+ } else {
+ pud = pud_offset(p4d, addr);
+ }
do {
next = pud_addr_end(addr, end);
- err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
- if (err)
- break;
+ if (create || !pud_none_or_clear_bad(pud)) {
+ err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (pud++, addr = next, addr != end);
return err;
}
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
p4d_t *p4d;
unsigned long next;
- int err;
+ int err = 0;
- p4d = p4d_alloc(mm, pgd, addr);
- if (!p4d)
- return -ENOMEM;
+ if (create) {
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return -ENOMEM;
+ } else {
+ p4d = p4d_offset(pgd, addr);
+ }
do {
next = p4d_addr_end(addr, end);
- err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
- if (err)
- break;
+ if (create || !p4d_none_or_clear_bad(p4d)) {
+ err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (p4d++, addr = next, addr != end);
return err;
}
-/*
- * Scan a region of virtual memory, filling in page tables as necessary
- * and calling a provided function on each leaf page table.
- */
-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
- unsigned long size, pte_fn_t fn, void *data)
+static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn,
+ void *data, bool create)
{
pgd_t *pgd;
unsigned long next;
unsigned long end = addr + size;
- int err;
+ int err = 0;
if (WARN_ON(addr >= end))
return -EINVAL;
@@ -2166,16 +2192,42 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
- err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
+ if (!create && pgd_none_or_clear_bad(pgd))
+ continue;
+ err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create);
if (err)
break;
} while (pgd++, addr = next, addr != end);
return err;
}
+
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ */
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
+{
+ return __apply_to_page_range(mm, addr, size, fn, data, true);
+}
EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
+ * Scan a region of virtual memory, calling a provided function on
+ * each leaf page table where it exists.
+ *
+ * Unlike apply_to_page_range, this does _not_ fill in page tables
+ * where they are absent.
+ */
+int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
+{
+ return __apply_to_page_range(mm, addr, size, fn, data, false);
+}
+EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
+
+/*
* handle_pte_fault chooses page fault handler according to an entry which was
* read non-atomically. Before making any commitment, on those architectures
* or configurations (e.g. i386 with PAE) which might give a mix of unmatched
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 55ac23ef11c1..a91a072f2b2c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -483,8 +483,9 @@ static void update_pgdat_span(struct pglist_data *pgdat)
pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
}
-static void __remove_zone(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages)
+void __ref remove_pfn_range_from_zone(struct zone *zone,
+ unsigned long start_pfn,
+ unsigned long nr_pages)
{
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
@@ -499,28 +500,30 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
return;
#endif
+ clear_zone_contiguous(zone);
+
pgdat_resize_lock(zone->zone_pgdat, &flags);
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
update_pgdat_span(pgdat);
pgdat_resize_unlock(zone->zone_pgdat, &flags);
+
+ set_zone_contiguous(zone);
}
-static void __remove_section(struct zone *zone, unsigned long pfn,
- unsigned long nr_pages, unsigned long map_offset,
- struct vmem_altmap *altmap)
+static void __remove_section(unsigned long pfn, unsigned long nr_pages,
+ unsigned long map_offset,
+ struct vmem_altmap *altmap)
{
struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
if (WARN_ON_ONCE(!valid_section(ms)))
return;
- __remove_zone(zone, pfn, nr_pages);
sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
}
/**
- * __remove_pages() - remove sections of pages from a zone
- * @zone: zone from which pages need to be removed
+ * __remove_pages() - remove sections of pages
* @pfn: starting pageframe (must be aligned to start of a section)
* @nr_pages: number of pages to remove (must be multiple of section size)
* @altmap: alternative device page map or %NULL if default memmap is used
@@ -530,16 +533,14 @@ static void __remove_section(struct zone *zone, unsigned long pfn,
* sure that pages are marked reserved and zones are adjust properly by
* calling offline_pages().
*/
-void __remove_pages(struct zone *zone, unsigned long pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap)
+void __remove_pages(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap)
{
unsigned long map_offset = 0;
unsigned long nr, start_sec, end_sec;
map_offset = vmem_altmap_offset(altmap);
- clear_zone_contiguous(zone);
-
if (check_pfn_span(pfn, nr_pages, "remove"))
return;
@@ -551,13 +552,11 @@ void __remove_pages(struct zone *zone, unsigned long pfn,
cond_resched();
pfns = min(nr_pages, PAGES_PER_SECTION
- (pfn & ~PAGE_SECTION_MASK));
- __remove_section(zone, pfn, pfns, map_offset, altmap);
+ __remove_section(pfn, pfns, map_offset, altmap);
pfn += pfns;
nr_pages -= pfns;
map_offset = 0;
}
-
- set_zone_contiguous(zone);
}
int set_online_page_callback(online_page_callback_t callback)
@@ -869,6 +868,7 @@ failed_addition:
(unsigned long long) pfn << PAGE_SHIFT,
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
+ remove_pfn_range_from_zone(zone, pfn, nr_pages);
mem_hotplug_done();
return ret;
}
@@ -1628,6 +1628,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
writeback_set_ratelimit();
memory_notify(MEM_OFFLINE, &arg);
+ remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
mem_hotplug_done();
return 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 067cf7d3daf5..b2920ae87a61 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2148,18 +2148,22 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
+ /*
+ * First, try to allocate THP only on local node, but
+ * don't reclaim unnecessarily, just compact.
+ */
page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
+ gfp | __GFP_THISNODE | __GFP_NORETRY, order);
/*
* If hugepage allocations are configured to always
* synchronous compact or the vma has been madvised
* to prefer hugepage backing, retry allowing remote
- * memory as well.
+ * memory with both reclaim and compact as well.
*/
if (!page && (gfp & __GFP_DIRECT_RECLAIM))
page = __alloc_pages_node(hpage_node,
- gfp | __GFP_NORETRY, order);
+ gfp, order);
goto out;
}
diff --git a/mm/memremap.c b/mm/memremap.c
index 03ccbdfeb697..c51c6bd2fe34 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap *pgmap)
mem_hotplug_begin();
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
- __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
+ __remove_pages(PHYS_PFN(res->start),
PHYS_PFN(resource_size(res)), NULL);
} else {
arch_remove_memory(nid, res->start, resource_size(res),
diff --git a/mm/migrate.c b/mm/migrate.c
index eae1565285e3..86873b6f38a7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1512,9 +1512,11 @@ static int do_move_pages_to_node(struct mm_struct *mm,
/*
* Resolves the given address to a struct page, isolates it from the LRU and
* puts it to the given pagelist.
- * Returns -errno if the page cannot be found/isolated or 0 when it has been
- * queued or the page doesn't need to be migrated because it is already on
- * the target node
+ * Returns:
+ * errno - if the page cannot be found/isolated
+ * 0 - when it doesn't have to be migrated because it is already on the
+ * target node
+ * 1 - when it has been queued
*/
static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
int node, struct list_head *pagelist, bool migrate_all)
@@ -1553,7 +1555,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
if (PageHuge(page)) {
if (PageHead(page)) {
isolate_huge_page(page, pagelist);
- err = 0;
+ err = 1;
}
} else {
struct page *head;
@@ -1563,7 +1565,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
if (err)
goto out_putpage;
- err = 0;
+ err = 1;
list_add_tail(&head->lru, pagelist);
mod_node_page_state(page_pgdat(head),
NR_ISOLATED_ANON + page_is_file_cache(head),
@@ -1640,8 +1642,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
*/
err = add_page_for_migration(mm, addr, current_node,
&pagelist, flags & MPOL_MF_MOVE_ALL);
- if (!err)
+
+ if (!err) {
+ /* The page is already on the target node */
+ err = store_status(status, i, current_node, 1);
+ if (err)
+ goto out_flush;
continue;
+ } else if (err > 0) {
+ /* The page is successfully queued for migration */
+ continue;
+ }
err = store_status(status, i, err, 1);
if (err)
diff --git a/mm/mmap.c b/mm/mmap.c
index 9c648524e4dc..71e4ffc83bcd 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -90,12 +90,6 @@ static void unmap_region(struct mm_struct *mm,
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
* w: (no) no w: (no) no w: (copy) copy w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
- *
- * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
- * MAP_PRIVATE:
- * r: (no) no
- * w: (no) no
- * x: (yes) yes
*/
pgprot_t protection_map[16] __ro_after_init = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 71e3acea7817..d58c481b3df8 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -890,7 +890,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
K(get_mm_counter(mm, MM_FILEPAGES)),
K(get_mm_counter(mm, MM_SHMEMPAGES)),
from_kuid(&init_user_ns, task_uid(victim)),
- mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
+ mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
task_unlock(victim);
/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 50055d2e4ea8..2caf780a42e7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -201,11 +201,11 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
if (this_bw < tot_bw) {
if (min) {
min *= this_bw;
- do_div(min, tot_bw);
+ min = div64_ul(min, tot_bw);
}
if (max < 100) {
max *= this_bw;
- do_div(max, tot_bw);
+ max = div64_ul(max, tot_bw);
}
}
@@ -766,7 +766,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
struct wb_domain *dom = dtc_dom(dtc);
unsigned long thresh = dtc->thresh;
u64 wb_thresh;
- long numerator, denominator;
+ unsigned long numerator, denominator;
unsigned long wb_min_ratio, wb_max_ratio;
/*
@@ -777,7 +777,7 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
wb_thresh *= numerator;
- do_div(wb_thresh, denominator);
+ wb_thresh = div64_ul(wb_thresh, denominator);
wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
@@ -1102,7 +1102,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
bw = written - min(written, wb->written_stamp);
bw *= HZ;
if (unlikely(elapsed > period)) {
- do_div(bw, elapsed);
+ bw = div64_ul(bw, elapsed);
avg = bw;
goto out;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4785a8a2040e..d047bf7d8fd4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -694,34 +694,27 @@ void prep_compound_page(struct page *page, unsigned int order)
#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned int _debug_guardpage_minorder;
-#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
-DEFINE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
-#else
+bool _debug_pagealloc_enabled_early __read_mostly
+ = IS_ENABLED(CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT);
+EXPORT_SYMBOL(_debug_pagealloc_enabled_early);
DEFINE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
-#endif
EXPORT_SYMBOL(_debug_pagealloc_enabled);
DEFINE_STATIC_KEY_FALSE(_debug_guardpage_enabled);
static int __init early_debug_pagealloc(char *buf)
{
- bool enable = false;
-
- if (kstrtobool(buf, &enable))
- return -EINVAL;
-
- if (enable)
- static_branch_enable(&_debug_pagealloc_enabled);
-
- return 0;
+ return kstrtobool(buf, &_debug_pagealloc_enabled_early);
}
early_param("debug_pagealloc", early_debug_pagealloc);
-static void init_debug_guardpage(void)
+void init_debug_pagealloc(void)
{
if (!debug_pagealloc_enabled())
return;
+ static_branch_enable(&_debug_pagealloc_enabled);
+
if (!debug_guardpage_minorder())
return;
@@ -1186,7 +1179,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
*/
arch_free_page(page, order);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 0);
kasan_free_nondeferred_pages(page, order);
@@ -1207,7 +1200,7 @@ static bool free_pcp_prepare(struct page *page)
static bool bulkfree_pcp_prepare(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return free_pages_check(page);
else
return false;
@@ -1221,7 +1214,7 @@ static bool bulkfree_pcp_prepare(struct page *page)
*/
static bool free_pcp_prepare(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return free_pages_prepare(page, 0, true);
else
return free_pages_prepare(page, 0, false);
@@ -1973,10 +1966,6 @@ void __init page_alloc_init_late(void)
for_each_populated_zone(zone)
set_zone_contiguous(zone);
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- init_debug_guardpage();
-#endif
}
#ifdef CONFIG_CMA
@@ -2106,7 +2095,7 @@ static inline bool free_pages_prezeroed(void)
*/
static inline bool check_pcp_refill(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return check_new_page(page);
else
return false;
@@ -2128,7 +2117,7 @@ static inline bool check_pcp_refill(struct page *page)
}
static inline bool check_new_pcp(struct page *page)
{
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
return check_new_page(page);
else
return false;
@@ -2155,7 +2144,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
set_page_refcounted(page);
arch_alloc_page(page, order);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
kernel_map_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);
kernel_poison_pages(page, 1 << order, 1);
@@ -4476,8 +4465,11 @@ retry_cpuset:
if (page)
goto got_pg;
- if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
- !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
+ /*
+ * Checks for costly allocations with __GFP_NORETRY, which
+ * includes some THP page fault allocations
+ */
+ if (costly_order && (gfp_mask & __GFP_NORETRY)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
@@ -4498,23 +4490,6 @@ retry_cpuset:
if (compact_result == COMPACT_SKIPPED ||
compact_result == COMPACT_DEFERRED)
goto nopage;
- }
-
- /*
- * Checks for costly allocations with __GFP_NORETRY, which
- * includes THP page fault allocations
- */
- if (costly_order && (gfp_mask & __GFP_NORETRY)) {
- /*
- * If compaction is deferred for high-order allocations,
- * it is because sync compaction recently failed. If
- * this is the case and the caller requested a THP
- * allocation, we do not want to heavily disrupt the
- * system, so we fail the allocation instead of entering
- * direct reclaim.
- */
- if (compact_result == COMPACT_DEFERRED)
- goto nopage;
/*
* Looks like reclaim/compaction is worth trying, but
diff --git a/mm/shmem.c b/mm/shmem.c
index 165fa6332993..8793e8cc1a48 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2107,9 +2107,10 @@ unsigned long shmem_get_unmapped_area(struct file *file,
/*
* Our priority is to support MAP_SHARED mapped hugely;
* and support MAP_PRIVATE mapped hugely too, until it is COWed.
- * But if caller specified an address hint, respect that as before.
+ * But if caller specified an address hint and we allocated area there
+ * successfully, respect that as before.
*/
- if (uaddr)
+ if (uaddr == addr)
return addr;
if (shmem_huge != SHMEM_HUGE_FORCE) {
@@ -2143,7 +2144,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
if (inflated_len < len)
return addr;
- inflated_addr = get_area(NULL, 0, inflated_len, 0, flags);
+ inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags);
if (IS_ERR_VALUE(inflated_addr))
return addr;
if (inflated_addr & ~PAGE_MASK)
diff --git a/mm/slab.c b/mm/slab.c
index f1e1840af533..a89633603b2d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1416,7 +1416,7 @@ static void kmem_rcu_free(struct rcu_head *head)
#if DEBUG
static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
{
- if (debug_pagealloc_enabled() && OFF_SLAB(cachep) &&
+ if (debug_pagealloc_enabled_static() && OFF_SLAB(cachep) &&
(cachep->size % PAGE_SIZE) == 0)
return true;
@@ -2008,7 +2008,7 @@ int __kmem_cache_create(struct kmem_cache *cachep, slab_flags_t flags)
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
- if (debug_pagealloc_enabled() && (flags & SLAB_POISON) &&
+ if (debug_pagealloc_enabled_static() && (flags & SLAB_POISON) &&
size >= 256 && cachep->object_size > cache_line_size()) {
if (size < PAGE_SIZE || size % PAGE_SIZE == 0) {
size_t tmp_size = ALIGN(size, PAGE_SIZE);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f0ab6d4ceb4c..0d95ddea13b0 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -903,7 +903,8 @@ static void flush_memcg_workqueue(struct kmem_cache *s)
* deactivates the memcg kmem_caches through workqueue. Make sure all
* previous workitems on workqueue are processed.
*/
- flush_workqueue(memcg_kmem_cache_wq);
+ if (likely(memcg_kmem_cache_wq))
+ flush_workqueue(memcg_kmem_cache_wq);
/*
* If we're racing with children kmem_cache deactivation, it might
diff --git a/mm/slub.c b/mm/slub.c
index d11389710b12..8eafccf75940 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -288,7 +288,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
unsigned long freepointer_addr;
void *p;
- if (!debug_pagealloc_enabled())
+ if (!debug_pagealloc_enabled_static())
return get_freepointer(s, object);
freepointer_addr = (unsigned long)object + s->offset;
diff --git a/mm/sparse.c b/mm/sparse.c
index b20ab7cdac86..3822ecbd8a1f 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -777,7 +777,14 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
unsigned long section_nr = pfn_to_section_nr(pfn);
- if (!section_is_early) {
+ /*
+ * When removing an early section, the usage map is kept (as the
+ * usage maps of other sections fall into the same page). It
+ * will be re-used when re-adding the section - which is then no
+ * longer an early section. If the usage map is PageReserved, it
+ * was allocated during boot.
+ */
+ if (!PageReserved(virt_to_page(ms->usage))) {
kfree(ms->usage);
ms->usage = NULL;
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4d3b3d60d893..b29ad17edcf5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1062,6 +1062,26 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
}
/*
+ * Free a region of KVA allocated by alloc_vmap_area
+ */
+static void free_vmap_area(struct vmap_area *va)
+{
+ /*
+ * Remove from the busy tree/list.
+ */
+ spin_lock(&vmap_area_lock);
+ unlink_va(va, &vmap_area_root);
+ spin_unlock(&vmap_area_lock);
+
+ /*
+ * Insert/Merge it back to the free tree/list.
+ */
+ spin_lock(&free_vmap_area_lock);
+ merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
+ spin_unlock(&free_vmap_area_lock);
+}
+
+/*
* Allocate a region of KVA of the specified size and alignment, within the
* vstart and vend.
*/
@@ -1073,6 +1093,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
struct vmap_area *va, *pva;
unsigned long addr;
int purged = 0;
+ int ret;
BUG_ON(!size);
BUG_ON(offset_in_page(size));
@@ -1139,6 +1160,7 @@ retry:
va->va_end = addr + size;
va->vm = NULL;
+
spin_lock(&vmap_area_lock);
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
spin_unlock(&vmap_area_lock);
@@ -1147,6 +1169,12 @@ retry:
BUG_ON(va->va_start < vstart);
BUG_ON(va->va_end > vend);
+ ret = kasan_populate_vmalloc(addr, size);
+ if (ret) {
+ free_vmap_area(va);
+ return ERR_PTR(ret);
+ }
+
return va;
overflow:
@@ -1186,26 +1214,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
/*
- * Free a region of KVA allocated by alloc_vmap_area
- */
-static void free_vmap_area(struct vmap_area *va)
-{
- /*
- * Remove from the busy tree/list.
- */
- spin_lock(&vmap_area_lock);
- unlink_va(va, &vmap_area_root);
- spin_unlock(&vmap_area_lock);
-
- /*
- * Insert/Merge it back to the free tree/list.
- */
- spin_lock(&free_vmap_area_lock);
- merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
- spin_unlock(&free_vmap_area_lock);
-}
-
-/*
* Clear the pagetable entries of a given vmap_area
*/
static void unmap_vmap_area(struct vmap_area *va)
@@ -1375,7 +1383,7 @@ static void free_unmap_vmap_area(struct vmap_area *va)
{
flush_cache_vunmap(va->va_start, va->va_end);
unmap_vmap_area(va);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
flush_tlb_kernel_range(va->va_start, va->va_end);
free_vmap_area_noflush(va);
@@ -1673,7 +1681,7 @@ static void vb_free(const void *addr, unsigned long size)
vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
- if (debug_pagealloc_enabled())
+ if (debug_pagealloc_enabled_static())
flush_tlb_kernel_range((unsigned long)addr,
(unsigned long)addr + size);
@@ -1771,6 +1779,8 @@ void vm_unmap_ram(const void *mem, unsigned int count)
BUG_ON(addr > VMALLOC_END);
BUG_ON(!PAGE_ALIGNED(addr));
+ kasan_poison_vmalloc(mem, size);
+
if (likely(count <= VMAP_MAX_ALLOC)) {
debug_check_no_locks_freed(mem, size);
vb_free(mem, size);
@@ -1821,6 +1831,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
addr = va->va_start;
mem = (void *)addr;
}
+
+ kasan_unpoison_vmalloc(mem, size);
+
if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
vm_unmap_ram(mem, count);
return NULL;
@@ -2075,6 +2088,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
{
struct vmap_area *va;
struct vm_struct *area;
+ unsigned long requested_size = size;
BUG_ON(in_interrupt());
size = PAGE_ALIGN(size);
@@ -2098,23 +2112,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
return NULL;
}
- setup_vmalloc_vm(area, va, flags, caller);
+ kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
- /*
- * For KASAN, if we are in vmalloc space, we need to cover the shadow
- * area with real memory. If we come here through VM_ALLOC, this is
- * done by a higher level function that has access to the true size,
- * which might not be a full page.
- *
- * We assume module space comes via VM_ALLOC path.
- */
- if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) {
- if (kasan_populate_vmalloc(area->size, area)) {
- unmap_vmap_area(va);
- kfree(area);
- return NULL;
- }
- }
+ setup_vmalloc_vm(area, va, flags, caller);
return area;
}
@@ -2293,8 +2293,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
- if (area->flags & VM_KASAN)
- kasan_poison_vmalloc(area->addr, area->size);
+ kasan_poison_vmalloc(area->addr, area->size);
vm_remove_mappings(area, deallocate_pages);
@@ -2539,7 +2538,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
- area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
+ area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
@@ -2548,11 +2547,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!addr)
return NULL;
- if (is_vmalloc_or_module_addr(area->addr)) {
- if (kasan_populate_vmalloc(real_size, area))
- return NULL;
- }
-
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
@@ -3294,7 +3288,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
struct vmap_area **vas, *va;
struct vm_struct **vms;
int area, area2, last_area, term_area;
- unsigned long base, start, size, end, last_end;
+ unsigned long base, start, size, end, last_end, orig_start, orig_end;
bool purged = false;
enum fit_type type;
@@ -3424,6 +3418,15 @@ retry:
spin_unlock(&free_vmap_area_lock);
+ /* populate the kasan shadow space */
+ for (area = 0; area < nr_vms; area++) {
+ if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
+ goto err_free_shadow;
+
+ kasan_unpoison_vmalloc((void *)vas[area]->va_start,
+ sizes[area]);
+ }
+
/* insert all vm's */
spin_lock(&vmap_area_lock);
for (area = 0; area < nr_vms; area++) {
@@ -3434,12 +3437,6 @@ retry:
}
spin_unlock(&vmap_area_lock);
- /* populate the shadow space outside of the lock */
- for (area = 0; area < nr_vms; area++) {
- /* assume success here */
- kasan_populate_vmalloc(sizes[area], vms[area]);
- }
-
kfree(vas);
return vms;
@@ -3451,8 +3448,12 @@ recovery:
* and when pcpu_get_vm_areas() is success.
*/
while (area--) {
- merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
- &free_vmap_area_list);
+ orig_start = vas[area]->va_start;
+ orig_end = vas[area]->va_end;
+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
+ &free_vmap_area_list);
+ kasan_release_vmalloc(orig_start, orig_end,
+ va->va_start, va->va_end);
vas[area] = NULL;
}
@@ -3487,6 +3488,28 @@ err_free2:
kfree(vas);
kfree(vms);
return NULL;
+
+err_free_shadow:
+ spin_lock(&free_vmap_area_lock);
+ /*
+ * We release all the vmalloc shadows, even the ones for regions that
+ * hadn't been successfully added. This relies on kasan_release_vmalloc
+ * being able to tolerate this case.
+ */
+ for (area = 0; area < nr_vms; area++) {
+ orig_start = vas[area]->va_start;
+ orig_end = vas[area]->va_end;
+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
+ &free_vmap_area_list);
+ kasan_release_vmalloc(orig_start, orig_end,
+ va->va_start, va->va_end);
+ vas[area] = NULL;
+ kfree(vms[area]);
+ }
+ spin_unlock(&free_vmap_area_lock);
+ kfree(vas);
+ kfree(vms);
+ return NULL;
}
/**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74e8edce83ca..572fb17c6273 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -387,7 +387,7 @@ void register_shrinker_prepared(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2b2b9aae8a3c..22d17ecfe7df 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
zs_pool_dec_isolated(pool);
}
+ if (page_zone(newpage) != page_zone(page)) {
+ dec_zone_page_state(page, NR_ZSPAGES);
+ inc_zone_page_state(newpage, NR_ZSPAGES);
+ }
+
reset_page(page);
put_page(page);
page = newpage;