From 99dc26bda233ee722bbd370bddf20beece3ffb93 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 23 Sep 2017 15:00:03 +0200 Subject: x86/fpu: Remove struct fpu::fpregs_active The previous changes paved the way for the removal of the fpu::fpregs_active state flag - we now only have the fpu::fpstate_active and fpu::last_cpu fields left. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-21-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'arch/x86/include/asm/fpu/types.h') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3c80f5b9c09d..0c314a397cf5 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -298,29 +298,6 @@ struct fpu { */ unsigned char fpstate_active; - /* - * @fpregs_active: - * - * This flag determines whether a given context is actively - * loaded into the FPU's registers and that those registers - * represent the task's current FPU state. - * - * Note the interaction with fpstate_active: - * - * # task does not use the FPU: - * fpstate_active == 0 - * - * # task uses the FPU and regs are active: - * fpstate_active == 1 && fpregs_active == 1 - * - * # the regs are inactive but still match fpstate: - * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu - * - * The third state is what we use for the lazy restore optimization - * on lazy-switching CPUs. - */ - unsigned char fpregs_active; - /* * @state: * -- cgit v1.2.3-59-g8ed1b From 0852b374173bb57f870d78e6c6839c77b339be5f Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sat, 23 Sep 2017 15:00:04 +0200 Subject: x86/fpu: Add FPU state copying quirk to handle XRSTOR failure on Intel Skylake CPUs On Skylake CPUs I noticed that XRSTOR is unable to deal with states created by copyout_from_xsaves() if the xstate has only SSE/YMM state, and no FP state. That is, xfeatures had XFEATURE_MASK_SSE set, but not XFEATURE_MASK_FP. The reason is that part of the SSE/YMM state lives in the MXCSR and MXCSR_FLAGS fields of the FP state. Ensure that whenever we copy SSE or YMM state around, the MXCSR and MXCSR_FLAGS fields are also copied around. Signed-off-by: Rik van Riel Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170210085445.0f1cc708@annuminas.surriel.com Link: http://lkml.kernel.org/r/20170923130016.21448-22-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 3 +++ arch/x86/kernel/fpu/xstate.c | 42 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'arch/x86/include/asm/fpu/types.h') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 0c314a397cf5..71db45ca8870 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -68,6 +68,9 @@ struct fxregs_state { /* Default value for fxregs_state.mxcsr: */ #define MXCSR_DEFAULT 0x1f80 +/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ +#define MXCSR_AND_FLAGS_SIZE sizeof(u64) + /* * Software based FPU emulation state. This is arbitrary really, * it matches the x87 format to make it easier to understand: diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0ef35040d0ad..41c52256bdce 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -920,6 +920,23 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, } #endif /* ! CONFIG_ARCH_HAS_PKEYS */ +/* + * Weird legacy quirk: SSE and YMM states store information in the + * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP + * area is marked as unused in the xfeatures header, we need to copy + * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use. + */ +static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) +{ + if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM))) + return 0; + + if (xfeatures & XFEATURE_MASK_FP) + return 0; + + return 1; +} + /* * This is similar to user_regset_copyout(), but will not add offset to * the source data pointer or increment pos, count, kbuf, and ubuf. @@ -988,6 +1005,12 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of } + if (xfeatures_mxcsr_quirk(header.xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total); + } + /* * Fill xsave->i387.sw_reserved value for ptrace frame: */ @@ -1070,6 +1093,12 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i } + if (xfeatures_mxcsr_quirk(header.xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total); + } + /* * Fill xsave->i387.sw_reserved value for ptrace frame: */ @@ -1122,6 +1151,12 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) } } + if (xfeatures_mxcsr_quirk(xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + memcpy(&xsave->i387.mxcsr, kbuf + offset, size); + } + /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': @@ -1177,6 +1212,13 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) } } + if (xfeatures_mxcsr_quirk(xfeatures)) { + offset = offsetof(struct fxregs_state, mxcsr); + size = MXCSR_AND_FLAGS_SIZE; + if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size)) + return -EFAULT; + } + /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': -- cgit v1.2.3-59-g8ed1b From e4a81bfcaae1ebbdc6efe74e8ea563144d90e9a9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Sep 2017 09:43:36 +0200 Subject: x86/fpu: Rename fpu::fpstate_active to fpu::initialized The x86 FPU code used to have a complex state machine where both the FPU registers and the FPU state context could be 'active' (or inactive) independently of each other - which enabled features like lazy FPU restore. Much of this complexity is gone in the current code: now we basically can have FPU-less tasks (kernel threads) that don't use (and save/restore) FPU state at all, plus full FPU users that save/restore directly with no laziness whatsoever. But the fpu::fpstate_active still carries bits of the old complexity - meanwhile this flag has become a simple flag that shows whether the FPU context saving area in the thread struct is initialized and used, or not. Rename it to fpu::initialized to express this simplicity in the name as well. Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Eric Biggers Cc: Fenghua Yu Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/20170923130016.21448-30-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 2 +- arch/x86/include/asm/fpu/internal.h | 4 ++-- arch/x86/include/asm/fpu/types.h | 6 +++--- arch/x86/include/asm/trace/fpu.h | 8 ++++---- arch/x86/kernel/fpu/core.c | 24 ++++++++++++------------ arch/x86/kernel/fpu/init.c | 2 +- arch/x86/kernel/fpu/regset.c | 6 +++--- arch/x86/kernel/fpu/signal.c | 8 ++++---- arch/x86/kernel/fpu/xstate.c | 2 +- arch/x86/kernel/signal.c | 6 +++--- arch/x86/mm/pkeys.c | 2 +- 11 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/x86/include/asm/fpu/types.h') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index e0bb46c02857..0e2a5edbce00 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (fpu->fpstate_active) { + if (fpu->initialized) { unsigned long fx_aligned, math_size; sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 508e4181c4af..b26ae05da18a 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -527,7 +527,7 @@ static inline void fpregs_activate(struct fpu *fpu) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->fpstate_active) { + if (old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -550,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) { bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->fpstate_active; + new_fpu->initialized; if (preload) { if (!fpregs_state_valid(new_fpu, cpu)) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 71db45ca8870..a1520575d86b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -293,13 +293,13 @@ struct fpu { unsigned int last_cpu; /* - * @fpstate_active: + * @initialized: * - * This flag indicates whether this context is active: if the task + * This flag indicates whether this context is initialized: if the task * is not running then we can restore from this context, if the task * is running then we should save into this context. */ - unsigned char fpstate_active; + unsigned char initialized; /* * @state: diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index da565aae9fd2..39f7a27bef13 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -12,22 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, fpstate_active) + __field(bool, initialized) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->fpstate_active = fpu->fpstate_active; + __entry->initialized = fpu->initialized; if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->fpstate_active, + __entry->initialized, __entry->xfeatures, __entry->xcomp_bv ) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b2cdeb3b1860..c8d6032f04d0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -100,7 +100,7 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->fpstate_active) { + if (fpu->initialized) { /* * Ignore return value -- we don't care if reg state * is clobbered. @@ -116,7 +116,7 @@ void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu; - if (fpu->fpstate_active) + if (fpu->initialized) copy_kernel_to_fpregs(&fpu->state); kernel_fpu_enable(); @@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu) preempt_disable(); trace_x86_fpu_before_save(fpu); - if (fpu->fpstate_active) { + if (fpu->initialized) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } @@ -191,7 +191,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { dst_fpu->last_cpu = -1; - if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) + if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); @@ -240,13 +240,13 @@ void fpu__activate_curr(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - if (!fpu->fpstate_active) { + if (!fpu->initialized) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); trace_x86_fpu_activate_state(fpu); /* Safe to do for the current task: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } EXPORT_SYMBOL_GPL(fpu__activate_curr); @@ -271,13 +271,13 @@ void fpu__activate_fpstate_read(struct fpu *fpu) if (fpu == ¤t->thread.fpu) { fpu__save(fpu); } else { - if (!fpu->fpstate_active) { + if (!fpu->initialized) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); trace_x86_fpu_activate_state(fpu); /* Safe to do for current and for stopped child tasks: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } } @@ -303,7 +303,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) */ WARN_ON_FPU(fpu == ¤t->thread.fpu); - if (fpu->fpstate_active) { + if (fpu->initialized) { /* Invalidate any lazy state: */ __fpu_invalidate_fpregs_state(fpu); } else { @@ -312,7 +312,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu) trace_x86_fpu_activate_state(fpu); /* Safe to do for stopped child tasks: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } @@ -354,7 +354,7 @@ void fpu__drop(struct fpu *fpu) preempt_disable(); if (fpu == ¤t->thread.fpu) { - if (fpu->fpstate_active) { + if (fpu->initialized) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" @@ -363,7 +363,7 @@ void fpu__drop(struct fpu *fpu) } } - fpu->fpstate_active = 0; + fpu->initialized = 0; trace_x86_fpu_dropped(fpu); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index d5d44c452624..7affb7e3d9a5 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - WARN_ON_FPU(current->thread.fpu.fpstate_active); + WARN_ON_FPU(current->thread.fpu.initialized); } /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index c764f7405322..19e82334e811 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r { struct fpu *target_fpu = &target->thread.fpu; - return target_fpu->fpstate_active ? regset->n : 0; + return target_fpu->initialized ? regset->n : 0; } int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { struct fpu *target_fpu = &target->thread.fpu; - if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active) + if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized) return regset->n; else return 0; @@ -380,7 +380,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) struct fpu *fpu = &tsk->thread.fpu; int fpvalid; - fpvalid = fpu->fpstate_active; + fpvalid = fpu->initialized; if (fpvalid) fpvalid = !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index da68ea1c3a44..ab2dd24cfea4 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -171,7 +171,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpu->fpstate_active || using_compacted_format()) { + if (fpu->initialized || using_compacted_format()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; @@ -315,12 +315,12 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) int err = 0; /* - * Drop the current fpu which clears fpu->fpstate_active. This ensures + * Drop the current fpu which clears fpu->initialized. This ensures * that any context-switch during the copy of the new state, * avoids the intermediate state from getting restored/saved. * Thus avoiding the new restored state from getting corrupted. * We will be ready to restore/save the state only after - * fpu->fpstate_active is again set. + * fpu->initialized is again set. */ fpu__drop(fpu); @@ -342,7 +342,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } - fpu->fpstate_active = 1; + fpu->initialized = 1; preempt_disable(); fpu__restore(fpu); preempt_enable(); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index fda1109cc355..703e76d027ee 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -867,7 +867,7 @@ const void *get_xsave_field_ptr(int xsave_state) { struct fpu *fpu = ¤t->thread.fpu; - if (!fpu->fpstate_active) + if (!fpu->initialized) return NULL; /* * fpu__save() takes the CPU's xstate registers diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e04442345fc0..4e188fda5961 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, sp = (unsigned long) ka->sa.sa_restorer; } - if (fpu->fpstate_active) { + if (fpu->initialized) { sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), &buf_fx, &math_size); *fpstate = (void __user *)sp; @@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if (fpu->fpstate_active && + if (fpu->initialized && copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; @@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (fpu->fpstate_active) + if (fpu->initialized) fpu__clear(fpu); } signal_setup_done(failed, ksig, stepping); diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 4d24269c071f..d7bc0eea20a5 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -44,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm) */ preempt_disable(); if (!need_to_set_mm_pkey && - current->thread.fpu.fpstate_active && + current->thread.fpu.initialized && !__pkru_allows_read(read_pkru(), execute_only_pkey)) { preempt_enable(); return execute_only_pkey; -- cgit v1.2.3-59-g8ed1b