aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorRik van Riel <riel@surriel.com>2018-07-16 15:03:32 -0400
committerIngo Molnar <mingo@kernel.org>2018-07-17 09:35:31 +0200
commit2ff6ddf19c0ec40633bd14d8fe28a289816bd98d (patch)
treee608a4aa5331e3fcd5a1b00a6c65de41b6563eb5 /mm/memory.c
parentmm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on nr_cpu_ids (diff)
downloadlinux-dev-2ff6ddf19c0ec40633bd14d8fe28a289816bd98d.tar.xz
linux-dev-2ff6ddf19c0ec40633bd14d8fe28a289816bd98d.zip
x86/mm/tlb: Leave lazy TLB mode at page table free time
Andy discovered that speculative memory accesses while in lazy TLB mode can crash a system, when a CPU tries to dereference a speculative access using memory contents that used to be valid page table memory, but have since been reused for something else and point into la-la land. The latter problem can be prevented in two ways. The first is to always send a TLB shootdown IPI to CPUs in lazy TLB mode, while the second one is to only send the TLB shootdown at page table freeing time. The second should result in fewer IPIs, since operationgs like mprotect and madvise are very common with some workloads, but do not involve page table freeing. Also, on munmap, batching of page table freeing covers much larger ranges of virtual memory than the batching of unmapped user pages. Tested-by: Song Liu <songliubraving@fb.com> Signed-off-by: Rik van Riel <riel@surriel.com> Acked-by: Dave Hansen <dave.hansen@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: efault@gmx.de Cc: kernel-team@fb.com Cc: luto@kernel.org Link: http://lkml.kernel.org/r/20180716190337.26133-3-riel@surriel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to '')
-rw-r--r--mm/memory.c22
1 files changed, 14 insertions, 8 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..18355e0b971a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-/*
- * See the comment near struct mmu_table_batch.
- */
-
static void tlb_remove_table_smp_sync(void *arg)
{
- /* Simply deliver the interrupt */
+ struct mm_struct __maybe_unused *mm = arg;
+ /*
+ * On most architectures this does nothing. Simply delivering the
+ * interrupt is enough to prevent races with software page table
+ * walking like that done in get_user_pages_fast.
+ *
+ * See the comment near struct mmu_table_batch.
+ */
+ tlb_flush_remove_tables_local(mm);
}
-static void tlb_remove_table_one(void *table)
+static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
@@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)
* It is however sufficient for software page-table walkers that rely on
* IRQ disabling. See the comment near struct mmu_table_batch.
*/
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
__tlb_remove_table(table);
}
@@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)
{
struct mmu_table_batch **batch = &tlb->batch;
+ tlb_flush_remove_tables(tlb->mm);
+
if (*batch) {
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
@@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
- tlb_remove_table_one(table);
+ tlb_remove_table_one(table, tlb);
return;
}
(*batch)->nr = 0;