aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm/mmu_context.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/mmu_context.h')
-rw-r--r--arch/x86/include/asm/mmu_context.h53
1 files changed, 46 insertions, 7 deletions
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 40269a2bf6f9..883f6b933fa4 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -18,6 +18,21 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
}
#endif /* !CONFIG_PARAVIRT */
+#ifdef CONFIG_PERF_EVENTS
+extern struct static_key rdpmc_always_available;
+
+static inline void load_mm_cr4(struct mm_struct *mm)
+{
+ if (static_key_true(&rdpmc_always_available) ||
+ atomic_read(&mm->context.perf_rdpmc_allowed))
+ cr4_set_bits(X86_CR4_PCE);
+ else
+ cr4_clear_bits(X86_CR4_PCE);
+}
+#else
+static inline void load_mm_cr4(struct mm_struct *mm) {}
+#endif
+
/*
* Used for LDT copy/destruction.
*/
@@ -52,15 +67,20 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* Stop flush ipis for the previous mm */
cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ /* Load per-mm CR4 state */
+ load_mm_cr4(next);
+
/*
* Load the LDT, if the LDT is different.
*
- * It's possible leave_mm(prev) has been called. If so,
- * then prev->context.ldt could be out of sync with the
- * LDT descriptor or the LDT register. This can only happen
- * if prev->context.ldt is non-null, since we never free
- * an LDT. But LDTs can't be shared across mms, so
- * prev->context.ldt won't be equal to next->context.ldt.
+ * It's possible that prev->context.ldt doesn't match
+ * the LDT register. This can happen if leave_mm(prev)
+ * was called and then modify_ldt changed
+ * prev->context.ldt but suppressed an IPI to this CPU.
+ * In this case, prev->context.ldt != NULL, because we
+ * never free an LDT while the mm still exists. That
+ * means that next->context.ldt != prev->context.ldt,
+ * because mms never share an LDT.
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
@@ -85,6 +105,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
+ load_mm_cr4(next);
load_LDT_nolock(&next->context);
}
}
@@ -130,7 +151,25 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- mpx_notify_unmap(mm, vma, start, end);
+ /*
+ * mpx_notify_unmap() goes and reads a rarely-hot
+ * cacheline in the mm_struct. That can be expensive
+ * enough to be seen in profiles.
+ *
+ * The mpx_notify_unmap() call and its contents have been
+ * observed to affect munmap() performance on hardware
+ * where MPX is not present.
+ *
+ * The unlikely() optimizes for the fast case: no MPX
+ * in the CPU, or no MPX use in the process. Even if
+ * we get this wrong (in the unlikely event that MPX
+ * is widely enabled on some system) the overhead of
+ * MPX itself (reading bounds tables) is expected to
+ * overwhelm the overhead of getting this unlikely()
+ * consistently wrong.
+ */
+ if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
+ mpx_notify_unmap(mm, vma, start, end);
}
#endif /* _ASM_X86_MMU_CONTEXT_H */