aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig5
-rw-r--r--fs/fat/inode.c19
-rw-r--r--include/linux/mm.h4
-rw-r--r--mm/huge_memory.c3
-rw-r--r--mm/memory.c35
-rw-r--r--mm/memory_hotplug.c8
-rw-r--r--mm/mprotect.c38
-rw-r--r--mm/z3fold.c1
8 files changed, 85 insertions, 28 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 98de654b79b3..17fe351cdde0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -738,8 +738,9 @@ config HAVE_STACK_VALIDATION
config HAVE_RELIABLE_STACKTRACE
bool
help
- Architecture has a save_stack_trace_tsk_reliable() function which
- only returns a stack trace if it can guarantee the trace is reliable.
+ Architecture has either save_stack_trace_tsk_reliable() or
+ arch_stack_walk_reliable() function which only returns a stack trace
+ if it can guarantee the trace is reliable.
config HAVE_ARCH_HASH
bool
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 594b05ae16c9..71946da84388 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -750,6 +750,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return NULL;
init_rwsem(&ei->truncate_lock);
+ /* Zeroing to allow iput() even if partial initialized inode. */
+ ei->mmu_private = 0;
+ ei->i_start = 0;
+ ei->i_logstart = 0;
+ ei->i_attrs = 0;
+ ei->i_pos = 0;
+
return &ei->vfs_inode;
}
@@ -1374,16 +1381,6 @@ out:
return 0;
}
-static void fat_dummy_inode_init(struct inode *inode)
-{
- /* Initialize this dummy inode to work as no-op. */
- MSDOS_I(inode)->mmu_private = 0;
- MSDOS_I(inode)->i_start = 0;
- MSDOS_I(inode)->i_logstart = 0;
- MSDOS_I(inode)->i_attrs = 0;
- MSDOS_I(inode)->i_pos = 0;
-}
-
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1844,13 +1841,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
- fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
- fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52269e56c514..c54fb96cb1e6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2715,6 +2715,10 @@ static inline bool debug_pagealloc_enabled_static(void)
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+/*
+ * When called in DEBUG_PAGEALLOC context, the call should most likely be
+ * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static()
+ */
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b08b199f9a11..24ad53b4dfc0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3043,8 +3043,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
return;
flush_cache_range(vma, address, address + HPAGE_PMD_SIZE);
- pmdval = *pvmw->pmd;
- pmdp_invalidate(vma, address, pvmw->pmd);
+ pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
if (pmd_dirty(pmdval))
set_page_dirty(page);
entry = make_migration_entry(page, pmd_write(pmdval));
diff --git a/mm/memory.c b/mm/memory.c
index 0bccc622e482..e8bfdf0d9d1d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2257,7 +2257,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
bool ret;
void *kaddr;
void __user *uaddr;
- bool force_mkyoung;
+ bool locked = false;
struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
unsigned long addr = vmf->address;
@@ -2282,11 +2282,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* On architectures with software "accessed" bits, we would
* take a double page fault, so mark it accessed here.
*/
- force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte);
- if (force_mkyoung) {
+ if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
pte_t entry;
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
+ locked = true;
if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
/*
* Other thread has already handled the fault
@@ -2310,18 +2310,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src,
* zeroes.
*/
if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
+ if (locked)
+ goto warn;
+
+ /* Re-validate under PTL if the page is still mapped */
+ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
+ locked = true;
+ if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
+ /* The PTE changed under us. Retry page fault. */
+ ret = false;
+ goto pte_unlock;
+ }
+
/*
- * Give a warn in case there can be some obscure
- * use-case
+ * The same page can be mapped back since last copy attampt.
+ * Try to copy again under PTL.
*/
- WARN_ON_ONCE(1);
- clear_page(kaddr);
+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
+ /*
+ * Give a warn in case there can be some obscure
+ * use-case
+ */
+warn:
+ WARN_ON_ONCE(1);
+ clear_page(kaddr);
+ }
}
ret = true;
pte_unlock:
- if (force_mkyoung)
+ if (locked)
pte_unmap_unlock(vmf->pte, vmf->ptl);
kunmap_atomic(kaddr);
flush_dcache_page(dst);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 0a54ffac8c68..19389cdc16a5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -574,7 +574,13 @@ EXPORT_SYMBOL_GPL(restore_online_page_callback);
void generic_online_page(struct page *page, unsigned int order)
{
- kernel_map_pages(page, 1 << order, 1);
+ /*
+ * Freeing the page with debug_pagealloc enabled will try to unmap it,
+ * so we should map it first. This is better than introducing a special
+ * case in page freeing fast path.
+ */
+ if (debug_pagealloc_enabled_static())
+ kernel_map_pages(page, 1 << order, 1);
__free_pages_core(page, order);
totalram_pages_add(1UL << order);
#ifdef CONFIG_HIGHMEM
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 7a8e84f86831..311c0dadf71c 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -161,6 +161,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
return pages;
}
+/*
+ * Used when setting automatic NUMA hinting protection where it is
+ * critical that a numa hinting PMD is not confused with a bad PMD.
+ */
+static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
+{
+ pmd_t pmdval = pmd_read_atomic(pmd);
+
+ /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ barrier();
+#endif
+
+ if (pmd_none(pmdval))
+ return 1;
+ if (pmd_trans_huge(pmdval))
+ return 0;
+ if (unlikely(pmd_bad(pmdval))) {
+ pmd_clear_bad(pmd);
+ return 1;
+ }
+
+ return 0;
+}
+
static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
pud_t *pud, unsigned long addr, unsigned long end,
pgprot_t newprot, int dirty_accountable, int prot_numa)
@@ -178,8 +203,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
unsigned long this_pages;
next = pmd_addr_end(addr, end);
- if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
- && pmd_none_or_clear_bad(pmd))
+
+ /*
+ * Automatic NUMA balancing walks the tables with mmap_sem
+ * held for read. It's possible a parallel update to occur
+ * between pmd_trans_huge() and a pmd_none_or_clear_bad()
+ * check leading to a false positive and clearing.
+ * Hence, it's necessary to atomically read the PMD value
+ * for all the checks.
+ */
+ if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
+ pmd_none_or_clear_bad_unless_trans_huge(pmd))
goto next;
/* invoke the mmu notifier if the pmd is populated */
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 43754d8ebce8..42f31c4b53ad 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -41,7 +41,6 @@
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/rwlock.h>
#include <linux/zpool.h>
#include <linux/magic.h>