x86/mm/tlb: Flush remote and local TLBs concurrently

To improve TLB shootdown performance, flush the remote and local TLBs concurrently. Introduce flush_tlb_multi() that does so. Introduce paravirtual versions of flush_tlb_multi() for KVM, Xen and hyper-v (Xen and hyper-v are only compile-tested). While the updated smp infrastructure is capable of running a function on a single local core, it is not optimized for this case. The multiple function calls and the indirect branch introduce some overhead, and might make local TLB flushes slower than they were before the recent changes. Before calling the SMP infrastructure, check if only a local TLB flush is needed to restore the lost performance in this common case. This requires to check mm_cpumask() one more time, but unless this mask is updated very frequently, this should impact performance negatively. Signed-off-by: Nadav Amit <namit@vmware.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Michael Kelley <mikelley@microsoft.com> # Hyper-v parts Reviewed-by: Juergen Gross <jgross@suse.com> # Xen and paravirt parts Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com> Link: https://lore.kernel.org/r/20210220231712.2475218-5-namit@vmware.com
author: Nadav Amit <namit@vmware.com> 2021-02-20 15:17:07 -0800
committer: Ingo Molnar <mingo@kernel.org> 2021-03-06 12:59:10 +0100
commit: 4ce94eabac16b1d2c95762b40f49e5654ab288d7 (patch)
tree: 16fd5d3124ad8ae61e9c22fecc24f9c91adec4e9 /arch/x86/kernel/kvm.c
parent: x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy() (diff)
download: linux-dev-4ce94eabac16b1d2c95762b40f49e5654ab288d7.tar.xz
linux-dev-4ce94eabac16b1d2c95762b40f49e5654ab288d7.zip
1 files changed, 8 insertions, 3 deletions
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5e78e01ca3b4..38ea9dee2456 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -613,7 +613,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 }
 #endif
 
-static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
 			const struct flush_tlb_info *info)
 {
 	u8 state;
@@ -627,6 +627,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
 	 * queue flush_on_enter for pre-empted vCPUs
 	 */
 	for_each_cpu(cpu, flushmask) {
+		/*
+		 * The local vCPU is never preempted, so we do not explicitly
+		 * skip check for local vCPU - it will never be cleared from
+		 * flushmask.
+		 */
 		src = &per_cpu(steal_time, cpu);
 		state = READ_ONCE(src->preempted);
 		if ((state & KVM_VCPU_PREEMPTED)) {
@@ -636,7 +641,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
 		}
 	}
 
-	native_flush_tlb_others(flushmask, info);
+	native_flush_tlb_multi(flushmask, info);
 }
 
 static void __init kvm_guest_init(void)
@@ -654,7 +659,7 @@ static void __init kvm_guest_init(void)
 	}
 
 	if (pv_tlb_flush_supported()) {
-		pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
+		pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
 		pv_ops.mmu.tlb_remove_table = tlb_remove_table;
 		pr_info("KVM setup pv remote TLB flush\n");
 	}
author	Nadav Amit <namit@vmware.com>	2021-02-20 15:17:07 -0800
committer	Ingo Molnar <mingo@kernel.org>	2021-03-06 12:59:10 +0100
commit	4ce94eabac16b1d2c95762b40f49e5654ab288d7 (patch)
tree	16fd5d3124ad8ae61e9c22fecc24f9c91adec4e9 /arch/x86/kernel/kvm.c
parent	x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy() (diff)
download	linux-dev-4ce94eabac16b1d2c95762b40f49e5654ab288d7.tar.xz linux-dev-4ce94eabac16b1d2c95762b40f49e5654ab288d7.zip