aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig.debug2
-rw-r--r--mm/debug.c6
-rw-r--r--mm/filemap.c114
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/khugepaged.c25
-rw-r--r--mm/ksm.c3
-rw-r--r--mm/memcontrol.c31
-rw-r--r--mm/memory.c14
-rw-r--r--mm/memory_hotplug.c8
-rw-r--r--mm/mmap.c13
-rw-r--r--mm/page-writeback.c26
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/shmem.c5
-rw-r--r--mm/slab.c114
-rw-r--r--mm/slub.c65
-rw-r--r--mm/swapfile.c1
-rw-r--r--mm/usercopy.c64
-rw-r--r--mm/vmscan.c19
-rw-r--r--mm/workingset.c10
19 files changed, 254 insertions, 278 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 22f4cd96acb0..afcc550877ff 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -76,8 +76,6 @@ config PAGE_POISONING_ZERO
no longer necessary to write zeros when GFP_ZERO is used on
allocation.
- Enabling page poisoning with this option will disable hibernation
-
If unsure, say N
bool
diff --git a/mm/debug.c b/mm/debug.c
index 8865bfb41b0b..74c7cae4f683 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = {
void __dump_page(struct page *page, const char *reason)
{
+ int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+
pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
- page, page_ref_count(page), page_mapcount(page),
- page->mapping, page->index);
+ page, page_ref_count(page), mapcount,
+ page->mapping, page_to_pgoff(page));
if (PageCompound(page))
pr_cont(" compound_mapcount: %d", compound_mapcount(page));
pr_cont("\n");
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287dfc5372..2d0986a64f1f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -110,6 +110,62 @@
* ->tasklist_lock (memory_failure, collect_procs_ao)
*/
+static int page_cache_tree_insert(struct address_space *mapping,
+ struct page *page, void **shadowp)
+{
+ struct radix_tree_node *node;
+ void **slot;
+ int error;
+
+ error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+ &node, &slot);
+ if (error)
+ return error;
+ if (*slot) {
+ void *p;
+
+ p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ if (!radix_tree_exceptional_entry(p))
+ return -EEXIST;
+
+ mapping->nrexceptional--;
+ if (!dax_mapping(mapping)) {
+ if (shadowp)
+ *shadowp = p;
+ if (node)
+ workingset_node_shadows_dec(node);
+ } else {
+ /* DAX can replace empty locked entry with a hole */
+ WARN_ON_ONCE(p !=
+ (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
+ RADIX_DAX_ENTRY_LOCK));
+ /* DAX accounts exceptional entries as normal pages */
+ if (node)
+ workingset_node_pages_dec(node);
+ /* Wakeup waiters for exceptional entry lock */
+ dax_wake_mapping_entry_waiter(mapping, page->index,
+ false);
+ }
+ }
+ radix_tree_replace_slot(slot, page);
+ mapping->nrpages++;
+ if (node) {
+ workingset_node_pages_inc(node);
+ /*
+ * Don't track node that contains actual pages.
+ *
+ * Avoid acquiring the list_lru lock if already
+ * untracked. The list_empty() test is safe as
+ * node->private_list is protected by
+ * mapping->tree_lock.
+ */
+ if (!list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ }
+ return 0;
+}
+
static void page_cache_tree_delete(struct address_space *mapping,
struct page *page, void *shadow)
{
@@ -561,7 +617,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
spin_lock_irqsave(&mapping->tree_lock, flags);
__delete_from_page_cache(old, NULL);
- error = radix_tree_insert(&mapping->page_tree, offset, new);
+ error = page_cache_tree_insert(mapping, new, NULL);
BUG_ON(error);
mapping->nrpages++;
@@ -584,62 +640,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
}
EXPORT_SYMBOL_GPL(replace_page_cache_page);
-static int page_cache_tree_insert(struct address_space *mapping,
- struct page *page, void **shadowp)
-{
- struct radix_tree_node *node;
- void **slot;
- int error;
-
- error = __radix_tree_create(&mapping->page_tree, page->index, 0,
- &node, &slot);
- if (error)
- return error;
- if (*slot) {
- void *p;
-
- p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
- if (!radix_tree_exceptional_entry(p))
- return -EEXIST;
-
- mapping->nrexceptional--;
- if (!dax_mapping(mapping)) {
- if (shadowp)
- *shadowp = p;
- if (node)
- workingset_node_shadows_dec(node);
- } else {
- /* DAX can replace empty locked entry with a hole */
- WARN_ON_ONCE(p !=
- (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
- RADIX_DAX_ENTRY_LOCK));
- /* DAX accounts exceptional entries as normal pages */
- if (node)
- workingset_node_pages_dec(node);
- /* Wakeup waiters for exceptional entry lock */
- dax_wake_mapping_entry_waiter(mapping, page->index,
- false);
- }
- }
- radix_tree_replace_slot(slot, page);
- mapping->nrpages++;
- if (node) {
- workingset_node_pages_inc(node);
- /*
- * Don't track node that contains actual pages.
- *
- * Avoid acquiring the list_lru lock if already
- * untracked. The list_empty() test is safe as
- * node->private_list is protected by
- * mapping->tree_lock.
- */
- if (!list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes,
- &node->private_list);
- }
- return 0;
-}
-
static int __add_to_page_cache_locked(struct page *page,
struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2db2112aa31e..283583fcb1e7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1078,7 +1078,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
goto out;
page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page), page);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
if (flags & FOLL_TOUCH)
touch_pmd(vma, addr, pmd);
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
@@ -1116,7 +1116,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
}
skip_mlock:
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
- VM_BUG_ON_PAGE(!PageCompound(page), page);
+ VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
if (flags & FOLL_GET)
get_page(page);
@@ -1138,9 +1138,6 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
bool was_writable;
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
if (unlikely(!pmd_same(pmd, *fe->pmd)))
goto out_unlock;
@@ -1168,7 +1165,7 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
}
/* See similar comment in do_numa_page for explanation */
- if (!(vma->vm_flags & VM_WRITE))
+ if (!pmd_write(pmd))
flags |= TNF_NO_GROUP;
/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79c52d0061af..728d7790dc2d 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
* value (scan code).
*/
-static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
+static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
+ struct vm_area_struct **vmap)
{
struct vm_area_struct *vma;
unsigned long hstart, hend;
@@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
if (unlikely(khugepaged_test_exit(mm)))
return SCAN_ANY_PROCESS;
- vma = find_vma(mm, address);
+ *vmap = vma = find_vma(mm, address);
if (!vma)
return SCAN_VMA_NULL;
@@ -881,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
.pmd = pmd,
};
+ /* we only decide to swapin, if there is enough young ptes */
+ if (referenced < HPAGE_PMD_NR/2) {
+ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+ return false;
+ }
fe.pte = pte_offset_map(pmd, address);
for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
fe.pte++, fe.address += PAGE_SIZE) {
@@ -888,17 +894,12 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
if (!is_swap_pte(pteval))
continue;
swapped_in++;
- /* we only decide to swapin, if there is enough young ptes */
- if (referenced < HPAGE_PMD_NR/2) {
- trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
- return false;
- }
ret = do_swap_page(&fe, pteval);
/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
if (ret & VM_FAULT_RETRY) {
down_read(&mm->mmap_sem);
- if (hugepage_vma_revalidate(mm, address)) {
+ if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
/* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false;
@@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
static void collapse_huge_page(struct mm_struct *mm,
unsigned long address,
struct page **hpage,
- struct vm_area_struct *vma,
int node, int referenced)
{
pmd_t *pmd, _pmd;
@@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spinlock_t *pmd_ptl, *pte_ptl;
int isolated = 0, result = 0;
struct mem_cgroup *memcg;
+ struct vm_area_struct *vma;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
gfp_t gfp;
@@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm,
}
down_read(&mm->mmap_sem);
- result = hugepage_vma_revalidate(mm, address);
+ result = hugepage_vma_revalidate(mm, address, &vma);
if (result) {
mem_cgroup_cancel_charge(new_page, memcg, true);
up_read(&mm->mmap_sem);
@@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm,
* handled by the anon_vma lock + PG_lock.
*/
down_write(&mm->mmap_sem);
- result = hugepage_vma_revalidate(mm, address);
+ result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
goto out;
/* check if the pmd is still valid */
@@ -1202,7 +1203,7 @@ out_unmap:
if (ret) {
node = khugepaged_find_target_node();
/* collapse_huge_page will return with the mmap_sem released */
- collapse_huge_page(mm, address, hpage, vma, node, referenced);
+ collapse_huge_page(mm, address, hpage, node, referenced);
}
out:
trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
diff --git a/mm/ksm.c b/mm/ksm.c
index 73d43bafd9fb..5048083b60f2 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -283,7 +283,8 @@ static inline struct rmap_item *alloc_rmap_item(void)
{
struct rmap_item *rmap_item;
- rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+ rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
+ __GFP_NORETRY | __GFP_NOWARN);
if (rmap_item)
ksm_rmap_items++;
return rmap_item;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9a6a51a7c416..4be518d4e68a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex);
static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
struct memcg_stock_pcp *stock;
+ unsigned long flags;
bool ret = false;
if (nr_pages > CHARGE_BATCH)
return ret;
- stock = &get_cpu_var(memcg_stock);
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
stock->nr_pages -= nr_pages;
ret = true;
}
- put_cpu_var(memcg_stock);
+
+ local_irq_restore(flags);
+
return ret;
}
@@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock)
stock->cached = NULL;
}
-/*
- * This must be called under preempt disabled or must be called by
- * a thread which is pinned to local cpu.
- */
static void drain_local_stock(struct work_struct *dummy)
{
- struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ stock = this_cpu_ptr(&memcg_stock);
drain_stock(stock);
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+
+ local_irq_restore(flags);
}
/*
@@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy)
*/
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
{
- struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
+ struct memcg_stock_pcp *stock;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ stock = this_cpu_ptr(&memcg_stock);
if (stock->cached != memcg) { /* reset if necessary */
drain_stock(stock);
stock->cached = memcg;
}
stock->nr_pages += nr_pages;
- put_cpu_var(memcg_stock);
+
+ local_irq_restore(flags);
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index 83be99d9d8a1..f1a68049edff 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3351,9 +3351,6 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
bool was_writable = pte_write(pte);
int flags = 0;
- /* A PROT_NONE fault should not end up here */
- BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
/*
* The "pte" at this point cannot be used safely without
* validation through pte_unmap_same(). It's of NUMA type but
@@ -3398,7 +3395,7 @@ static int do_numa_page(struct fault_env *fe, pte_t pte)
* pte_dirty has unpredictable behaviour between PTE scan updates,
* background writeback, dirty balancing and application behaviour.
*/
- if (!(vma->vm_flags & VM_WRITE))
+ if (!pte_write(pte))
flags |= TNF_NO_GROUP;
/*
@@ -3458,6 +3455,11 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
return VM_FAULT_FALLBACK;
}
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -3524,7 +3526,7 @@ static int handle_pte_fault(struct fault_env *fe)
if (!pte_present(entry))
return do_swap_page(fe, entry);
- if (pte_protnone(entry))
+ if (pte_protnone(entry) && vma_is_accessible(fe->vma))
return do_numa_page(fe, entry);
fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
@@ -3590,7 +3592,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
barrier();
if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
- if (pmd_protnone(orig_pmd))
+ if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&fe, orig_pmd);
if ((fe.flags & FAULT_FLAG_WRITE) &&
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 41266dc29f33..9d29ba0f7192 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1555,8 +1555,8 @@ static struct page *new_node_page(struct page *page, unsigned long private,
{
gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
int nid = page_to_nid(page);
- nodemask_t nmask = node_online_map;
- struct page *new_page;
+ nodemask_t nmask = node_states[N_MEMORY];
+ struct page *new_page = NULL;
/*
* TODO: allocate a destination hugepage from a nearest neighbor node,
@@ -1568,11 +1568,13 @@ static struct page *new_node_page(struct page *page, unsigned long private,
next_node_in(nid, nmask));
node_clear(nid, nmask);
+
if (PageHighMem(page)
|| (zone_idx(page_zone(page)) == ZONE_MOVABLE))
gfp_mask |= __GFP_HIGHMEM;
- new_page = __alloc_pages_nodemask(gfp_mask, 0,
+ if (!nodes_empty(nmask))
+ new_page = __alloc_pages_nodemask(gfp_mask, 0,
node_zonelist(nid, gfp_mask), &nmask);
if (!new_page)
new_page = __alloc_pages(gfp_mask, 0,
diff --git a/mm/mmap.c b/mm/mmap.c
index ca9d91bca0d6..7a0707a48047 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,6 +88,11 @@ static void unmap_region(struct mm_struct *mm,
* w: (no) no w: (no) no w: (copy) copy w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
*
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE:
+ * r: (no) no
+ * w: (no) no
+ * x: (yes) yes
*/
pgprot_t protection_map[16] = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
@@ -3063,6 +3068,14 @@ out:
return ERR_PTR(ret);
}
+bool vma_is_special_mapping(const struct vm_area_struct *vma,
+ const struct vm_special_mapping *sm)
+{
+ return vma->vm_private_data == sm &&
+ (vma->vm_ops == &special_mapping_vmops ||
+ vma->vm_ops == &legacy_special_mapping_vmops);
+}
+
/*
* Called with mm->mmap_sem held for writing.
* Insert a new vma covering the given region, with the given flags.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f4cd7d8005c9..28d6f36a2d79 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2080,26 +2080,12 @@ void writeback_set_ratelimit(void)
ratelimit_pages = 16;
}
-static int
-ratelimit_handler(struct notifier_block *self, unsigned long action,
- void *hcpu)
+static int page_writeback_cpu_online(unsigned int cpu)
{
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_ONLINE:
- case CPU_DEAD:
- writeback_set_ratelimit();
- return NOTIFY_OK;
- default:
- return NOTIFY_DONE;
- }
+ writeback_set_ratelimit();
+ return 0;
}
-static struct notifier_block ratelimit_nb = {
- .notifier_call = ratelimit_handler,
- .next = NULL,
-};
-
/*
* Called early on to tune the page writeback dirty limits.
*
@@ -2122,8 +2108,10 @@ void __init page_writeback_init(void)
{
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
- writeback_set_ratelimit();
- register_cpu_notifier(&ratelimit_nb);
+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
+ page_writeback_cpu_online, NULL);
+ cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
+ page_writeback_cpu_online);
}
/**
diff --git a/mm/page_io.c b/mm/page_io.c
index 16bd82fad38c..eafe5ddc2b54 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
int ret;
struct swap_info_struct *sis = page_swap_info(page);
+ BUG_ON(!PageSwapCache(page));
if (sis->flags & SWP_FILE) {
struct kiocb kiocb;
struct file *swap_file = sis->swap_file;
@@ -337,6 +338,7 @@ int swap_readpage(struct page *page)
int ret = 0;
struct swap_info_struct *sis = page_swap_info(page);
+ BUG_ON(!PageSwapCache(page));
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageUptodate(page), page);
if (frontswap_load(page) == 0) {
@@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page)
if (sis->flags & SWP_FILE) {
struct address_space *mapping = sis->swap_file->f_mapping;
+ BUG_ON(!PageSwapCache(page));
return mapping->a_ops->set_page_dirty(page);
} else {
return __set_page_dirty_no_writeback(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index fd8b2b5741b1..971fc83e6402 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -270,7 +270,7 @@ bool shmem_charge(struct inode *inode, long pages)
info->alloced -= pages;
shmem_recalc_inode(inode);
spin_unlock_irqrestore(&info->lock, flags);
-
+ shmem_unacct_blocks(info->flags, pages);
return false;
}
percpu_counter_add(&sbinfo->used_blocks, pages);
@@ -291,6 +291,7 @@ void shmem_uncharge(struct inode *inode, long pages)
if (sbinfo->max_blocks)
percpu_counter_sub(&sbinfo->used_blocks, pages);
+ shmem_unacct_blocks(info->flags, pages);
}
/*
@@ -1980,7 +1981,7 @@ unsigned long shmem_get_unmapped_area(struct file *file,
return addr;
sb = shm_mnt->mnt_sb;
}
- if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+ if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
return addr;
}
diff --git a/mm/slab.c b/mm/slab.c
index b67271024135..090fb26b3a39 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -886,6 +886,7 @@ static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
return 0;
}
+#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
/*
* Allocates and initializes node for a node on each slab cache, used for
* either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
@@ -908,6 +909,7 @@ static int init_cache_node_node(int node)
return 0;
}
+#endif
static int setup_kmem_cache_node(struct kmem_cache *cachep,
int node, gfp_t gfp, bool force_change)
@@ -975,6 +977,8 @@ fail:
return ret;
}
+#ifdef CONFIG_SMP
+
static void cpuup_canceled(long cpu)
{
struct kmem_cache *cachep;
@@ -1075,65 +1079,54 @@ bad:
return -ENOMEM;
}
-static int cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+int slab_prepare_cpu(unsigned int cpu)
{
- long cpu = (long)hcpu;
- int err = 0;
+ int err;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&slab_mutex);
- err = cpuup_prepare(cpu);
- mutex_unlock(&slab_mutex);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- start_cpu_timer(cpu);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DOWN_PREPARE:
- case CPU_DOWN_PREPARE_FROZEN:
- /*
- * Shutdown cache reaper. Note that the slab_mutex is
- * held so that if cache_reap() is invoked it cannot do
- * anything expensive but will only modify reap_work
- * and reschedule the timer.
- */
- cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
- /* Now the cache_reaper is guaranteed to be not running. */
- per_cpu(slab_reap_work, cpu).work.func = NULL;
- break;
- case CPU_DOWN_FAILED:
- case CPU_DOWN_FAILED_FROZEN:
- start_cpu_timer(cpu);
- break;
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /*
- * Even if all the cpus of a node are down, we don't free the
- * kmem_cache_node of any cache. This to avoid a race between
- * cpu_down, and a kmalloc allocation from another cpu for
- * memory from the node of the cpu going down. The node
- * structure is usually allocated from kmem_cache_create() and
- * gets destroyed at kmem_cache_destroy().
- */
- /* fall through */
+ mutex_lock(&slab_mutex);
+ err = cpuup_prepare(cpu);
+ mutex_unlock(&slab_mutex);
+ return err;
+}
+
+/*
+ * This is called for a failed online attempt and for a successful
+ * offline.
+ *
+ * Even if all the cpus of a node are down, we don't free the
+ * kmem_list3 of any cache. This to avoid a race between cpu_down, and
+ * a kmalloc allocation from another cpu for memory from the node of
+ * the cpu going down. The list3 structure is usually allocated from
+ * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
+ */
+int slab_dead_cpu(unsigned int cpu)
+{
+ mutex_lock(&slab_mutex);
+ cpuup_canceled(cpu);
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
#endif
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- mutex_lock(&slab_mutex);
- cpuup_canceled(cpu);
- mutex_unlock(&slab_mutex);
- break;
- }
- return notifier_from_errno(err);
+
+static int slab_online_cpu(unsigned int cpu)
+{
+ start_cpu_timer(cpu);
+ return 0;
}
-static struct notifier_block cpucache_notifier = {
- &cpuup_callback, NULL, 0
-};
+static int slab_offline_cpu(unsigned int cpu)
+{
+ /*
+ * Shutdown cache reaper. Note that the slab_mutex is held so
+ * that if cache_reap() is invoked it cannot do anything
+ * expensive but will only modify reap_work and reschedule the
+ * timer.
+ */
+ cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
+ /* Now the cache_reaper is guaranteed to be not running. */
+ per_cpu(slab_reap_work, cpu).work.func = NULL;
+ return 0;
+}
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
/*
@@ -1336,12 +1329,6 @@ void __init kmem_cache_init_late(void)
/* Done! */
slab_state = FULL;
- /*
- * Register a cpu startup notifier callback that initializes
- * cpu_cache_get for all new cpus
- */
- register_cpu_notifier(&cpucache_notifier);
-
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
@@ -1358,13 +1345,14 @@ void __init kmem_cache_init_late(void)
static int __init cpucache_init(void)
{
- int cpu;
+ int ret;
/*
* Register the timers that return unneeded pages to the page allocator
*/
- for_each_online_cpu(cpu)
- start_cpu_timer(cpu);
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
+ slab_online_cpu, slab_offline_cpu);
+ WARN_ON(ret < 0);
/* Done! */
slab_state = FULL;
diff --git a/mm/slub.c b/mm/slub.c
index 9adae58462f8..2b3e740609e9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -194,10 +194,6 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
#define __OBJECT_POISON 0x80000000UL /* Poison object */
#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
-#ifdef CONFIG_SMP
-static struct notifier_block slab_notifier;
-#endif
-
/*
* Tracking user of a slab.
*/
@@ -2305,6 +2301,25 @@ static void flush_all(struct kmem_cache *s)
}
/*
+ * Use the cpu notifier to insure that the cpu slabs are flushed when
+ * necessary.
+ */
+static int slub_cpu_dead(unsigned int cpu)
+{
+ struct kmem_cache *s;
+ unsigned long flags;
+
+ mutex_lock(&slab_mutex);
+ list_for_each_entry(s, &slab_caches, list) {
+ local_irq_save(flags);
+ __flush_cpu_slab(s, cpu);
+ local_irq_restore(flags);
+ }
+ mutex_unlock(&slab_mutex);
+ return 0;
+}
+
+/*
* Check if the objects in a per cpu structure fit numa
* locality expectations.
*/
@@ -4144,9 +4159,8 @@ void __init kmem_cache_init(void)
/* Setup random freelists for each cache */
init_freelist_randomization();
-#ifdef CONFIG_SMP
- register_cpu_notifier(&slab_notifier);
-#endif
+ cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
+ slub_cpu_dead);
pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
cache_line_size(),
@@ -4210,43 +4224,6 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
return err;
}
-#ifdef CONFIG_SMP
-/*
- * Use the cpu notifier to insure that the cpu slabs are flushed when
- * necessary.
- */
-static int slab_cpuup_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
-{
- long cpu = (long)hcpu;
- struct kmem_cache *s;
- unsigned long flags;
-
- switch (action) {
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list) {
- local_irq_save(flags);
- __flush_cpu_slab(s, cpu);
- local_irq_restore(flags);
- }
- mutex_unlock(&slab_mutex);
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block slab_notifier = {
- .notifier_call = slab_cpuup_callback
-};
-
-#endif
-
void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 78cfa292a29a..2657accc6e2b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry)
struct swap_info_struct *page_swap_info(struct page *page)
{
swp_entry_t swap = { .val = page_private(page) };
- BUG_ON(!PageSwapCache(page));
return swap_info[swp_type(swap)];
}
diff --git a/mm/usercopy.c b/mm/usercopy.c
index a3cc3052f830..3c8da0af9695 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -134,31 +134,16 @@ static inline const char *check_bogus_address(const void *ptr, unsigned long n)
return NULL;
}
-static inline const char *check_heap_object(const void *ptr, unsigned long n,
- bool to_user)
+/* Checks for allocs that are marked in some way as spanning multiple pages. */
+static inline const char *check_page_span(const void *ptr, unsigned long n,
+ struct page *page, bool to_user)
{
- struct page *page, *endpage;
+#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
const void *end = ptr + n - 1;
+ struct page *endpage;
bool is_reserved, is_cma;
/*
- * Some architectures (arm64) return true for virt_addr_valid() on
- * vmalloced addresses. Work around this by checking for vmalloc
- * first.
- */
- if (is_vmalloc_addr(ptr))
- return NULL;
-
- if (!virt_addr_valid(ptr))
- return NULL;
-
- page = virt_to_head_page(ptr);
-
- /* Check slab allocator for flags and size. */
- if (PageSlab(page))
- return __check_heap_object(ptr, n, page);
-
- /*
* Sometimes the kernel data regions are not marked Reserved (see
* check below). And sometimes [_sdata,_edata) does not cover
* rodata and/or bss, so check each range explicitly.
@@ -186,7 +171,7 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
((unsigned long)end & (unsigned long)PAGE_MASK)))
return NULL;
- /* Allow if start and end are inside the same compound page. */
+ /* Allow if fully inside the same compound (__GFP_COMP) page. */
endpage = virt_to_head_page(end);
if (likely(endpage == page))
return NULL;
@@ -199,20 +184,47 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
is_reserved = PageReserved(page);
is_cma = is_migrate_cma_page(page);
if (!is_reserved && !is_cma)
- goto reject;
+ return "<spans multiple pages>";
for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
page = virt_to_head_page(ptr);
if (is_reserved && !PageReserved(page))
- goto reject;
+ return "<spans Reserved and non-Reserved pages>";
if (is_cma && !is_migrate_cma_page(page))
- goto reject;
+ return "<spans CMA and non-CMA pages>";
}
+#endif
return NULL;
+}
+
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+ bool to_user)
+{
+ struct page *page;
+
+ /*
+ * Some architectures (arm64) return true for virt_addr_valid() on
+ * vmalloced addresses. Work around this by checking for vmalloc
+ * first.
+ *
+ * We also need to check for module addresses explicitly since we
+ * may copy static data from modules to userspace
+ */
+ if (is_vmalloc_or_module_addr(ptr))
+ return NULL;
+
+ if (!virt_addr_valid(ptr))
+ return NULL;
+
+ page = virt_to_head_page(ptr);
+
+ /* Check slab allocator for flags and size. */
+ if (PageSlab(page))
+ return __check_heap_object(ptr, n, page);
-reject:
- return "<spans multiple pages>";
+ /* Verify object does not incorrectly span multiple pages. */
+ return check_page_span(ptr, n, page, to_user);
}
/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b1e12a1ea9cf..0fe8b7113868 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2303,23 +2303,6 @@ out:
}
}
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void init_tlb_ubc(void)
-{
- /*
- * This deliberately does not clear the cpumask as it's expensive
- * and unnecessary. If there happens to be data in there then the
- * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and
- * then will be cleared.
- */
- current->tlb_ubc.flush_required = false;
-}
-#else
-static inline void init_tlb_ubc(void)
-{
-}
-#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-
/*
* This is a basic per-node page freer. Used by both kswapd and direct reclaim.
*/
@@ -2355,8 +2338,6 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
sc->priority == DEF_PRIORITY);
- init_tlb_ubc();
-
blk_start_plug(&plug);
while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
nr[LRU_INACTIVE_FILE]) {
diff --git a/mm/workingset.c b/mm/workingset.c
index 69551cfae97b..617475f529f4 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -418,21 +418,19 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
* no pages, so we expect to be able to remove them all and
* delete and free the empty node afterwards.
*/
-
- BUG_ON(!node->count);
- BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
+ BUG_ON(!workingset_node_shadows(node));
+ BUG_ON(workingset_node_pages(node));
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
if (node->slots[i]) {
BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
node->slots[i] = NULL;
- BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
- node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
+ workingset_node_shadows_dec(node);
BUG_ON(!mapping->nrexceptional);
mapping->nrexceptional--;
}
}
- BUG_ON(node->count);
+ BUG_ON(workingset_node_shadows(node));
inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
if (!__radix_tree_delete_node(&mapping->page_tree, node))
BUG();