aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/x86/kernel/fpu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/fpu')
-rw-r--r--arch/x86/kernel/fpu/context.h4
-rw-r--r--arch/x86/kernel/fpu/core.c141
-rw-r--r--arch/x86/kernel/fpu/init.c21
-rw-r--r--arch/x86/kernel/fpu/internal.h2
-rw-r--r--arch/x86/kernel/fpu/regset.c25
-rw-r--r--arch/x86/kernel/fpu/signal.c47
-rw-r--r--arch/x86/kernel/fpu/xstate.c342
-rw-r--r--arch/x86/kernel/fpu/xstate.h109
8 files changed, 454 insertions, 237 deletions
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index f6d856bd50bc..10d0a720659c 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -53,7 +53,7 @@ static inline void fpregs_activate(struct fpu *fpu)
/* Internal helper for switch_fpu_return() and signal frame setup */
static inline void fpregs_restore_userregs(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
int cpu = smp_processor_id();
if (WARN_ON_ONCE(current->flags & (PF_KTHREAD | PF_USER_WORKER)))
@@ -67,7 +67,7 @@ static inline void fpregs_restore_userregs(void)
* If PKRU is enabled, then the PKRU value is already
* correct because it was either set in switch_to() or in
* flush_thread(). So it is excluded because it might be
- * not up to date in current->thread.fpu.xsave state.
+ * not up to date in current->thread.fpu->xsave state.
*
* XFD state is handled in restore_fpregs_from_fpstate().
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 520deb411a70..ea138583dd92 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -11,6 +11,7 @@
#include <asm/fpu/sched.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/types.h>
+#include <asm/msr.h>
#include <asm/traps.h>
#include <asm/irq_regs.h>
@@ -43,14 +44,27 @@ struct fpu_state_config fpu_user_cfg __ro_after_init;
*/
struct fpstate init_fpstate __ro_after_init;
-/* Track in-kernel FPU usage */
-static DEFINE_PER_CPU(bool, in_kernel_fpu);
+/*
+ * Track FPU initialization and kernel-mode usage. 'true' means the FPU is
+ * initialized and is not currently being used by the kernel:
+ */
+DEFINE_PER_CPU(bool, kernel_fpu_allowed);
/*
* Track which context is using the FPU on the CPU:
*/
DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+ if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
+ return NULL;
+
+ return (void *)task + sizeof(*task);
+}
+#endif
+
/*
* Can we use the FPU in kernel mode with the
* whole "kernel_fpu_begin/end()" sequence?
@@ -60,8 +74,18 @@ bool irq_fpu_usable(void)
if (WARN_ON_ONCE(in_nmi()))
return false;
- /* In kernel FPU usage already active? */
- if (this_cpu_read(in_kernel_fpu))
+ /*
+ * Return false in the following cases:
+ *
+ * - FPU is not yet initialized. This can happen only when the call is
+ * coming from CPU onlining, for example for microcode checksumming.
+ * - The kernel is already using the FPU, either because of explicit
+ * nesting (which should never be done), or because of implicit
+ * nesting when a hardirq interrupted a kernel-mode FPU section.
+ *
+ * The single boolean check below handles both cases:
+ */
+ if (!this_cpu_read(kernel_fpu_allowed))
return false;
/*
@@ -145,8 +169,8 @@ void restore_fpregs_from_fpstate(struct fpstate *fpstate, u64 mask)
asm volatile(
"fnclex\n\t"
"emms\n\t"
- "fildl %P[addr]" /* set F?P to defined value */
- : : [addr] "m" (fpstate));
+ "fildl %[addr]" /* set F?P to defined value */
+ : : [addr] "m" (*fpstate));
}
if (use_xsave()) {
@@ -195,7 +219,7 @@ void fpu_reset_from_exception_fixup(void)
#if IS_ENABLED(CONFIG_KVM)
static void __fpstate_reset(struct fpstate *fpstate, u64 xfd);
-static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
+static void fpu_lock_guest_permissions(void)
{
struct fpu_state_perm *fpuperm;
u64 perm;
@@ -204,15 +228,13 @@ static void fpu_init_guest_permissions(struct fpu_guest *gfpu)
return;
spin_lock_irq(&current->sighand->siglock);
- fpuperm = &current->group_leader->thread.fpu.guest_perm;
+ fpuperm = &x86_task_fpu(current->group_leader)->guest_perm;
perm = fpuperm->__state_perm;
/* First fpstate allocation locks down permissions. */
WRITE_ONCE(fpuperm->__state_perm, perm | FPU_GUEST_PERM_LOCKED);
spin_unlock_irq(&current->sighand->siglock);
-
- gfpu->perm = perm & ~FPU_GUEST_PERM_LOCKED;
}
bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
@@ -220,7 +242,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
struct fpstate *fpstate;
unsigned int size;
- size = fpu_user_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
+ size = fpu_kernel_cfg.default_size + ALIGN(offsetof(struct fpstate, regs), 64);
fpstate = vzalloc(size);
if (!fpstate)
return false;
@@ -232,8 +254,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
fpstate->is_guest = true;
gfpu->fpstate = fpstate;
- gfpu->xfeatures = fpu_user_cfg.default_features;
- gfpu->perm = fpu_user_cfg.default_features;
+ gfpu->xfeatures = fpu_kernel_cfg.default_features;
/*
* KVM sets the FP+SSE bits in the XSAVE header when copying FPU state
@@ -248,7 +269,7 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu)
if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size))
gfpu->uabi_size = fpu_user_cfg.default_size;
- fpu_init_guest_permissions(gfpu);
+ fpu_lock_guest_permissions();
return true;
}
@@ -256,16 +277,16 @@ EXPORT_SYMBOL_GPL(fpu_alloc_guest_fpstate);
void fpu_free_guest_fpstate(struct fpu_guest *gfpu)
{
- struct fpstate *fps = gfpu->fpstate;
+ struct fpstate *fpstate = gfpu->fpstate;
- if (!fps)
+ if (!fpstate)
return;
- if (WARN_ON_ONCE(!fps->is_valloc || !fps->is_guest || fps->in_use))
+ if (WARN_ON_ONCE(!fpstate->is_valloc || !fpstate->is_guest || fpstate->in_use))
return;
gfpu->fpstate = NULL;
- vfree(fps);
+ vfree(fpstate);
}
EXPORT_SYMBOL_GPL(fpu_free_guest_fpstate);
@@ -316,12 +337,12 @@ EXPORT_SYMBOL_GPL(fpu_update_guest_xfd);
*/
void fpu_sync_guest_vmexit_xfd_state(void)
{
- struct fpstate *fps = current->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(current)->fpstate;
lockdep_assert_irqs_disabled();
if (fpu_state_size_dynamic()) {
- rdmsrl(MSR_IA32_XFD, fps->xfd);
- __this_cpu_write(xfd_state, fps->xfd);
+ rdmsrq(MSR_IA32_XFD, fpstate->xfd);
+ __this_cpu_write(xfd_state, fpstate->xfd);
}
}
EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
@@ -330,7 +351,7 @@ EXPORT_SYMBOL_GPL(fpu_sync_guest_vmexit_xfd_state);
int fpu_swap_kvm_fpstate(struct fpu_guest *guest_fpu, bool enter_guest)
{
struct fpstate *guest_fps = guest_fpu->fpstate;
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
struct fpstate *cur_fps = fpu->fpstate;
fpregs_lock();
@@ -420,17 +441,19 @@ EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate);
void kernel_fpu_begin_mask(unsigned int kfpu_mask)
{
- preempt_disable();
+ if (!irqs_disabled())
+ fpregs_lock();
WARN_ON_FPU(!irq_fpu_usable());
- WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
- this_cpu_write(in_kernel_fpu, true);
+ /* Toggle kernel_fpu_allowed to false: */
+ WARN_ON_FPU(!this_cpu_read(kernel_fpu_allowed));
+ this_cpu_write(kernel_fpu_allowed, false);
if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER)) &&
!test_thread_flag(TIF_NEED_FPU_LOAD)) {
set_thread_flag(TIF_NEED_FPU_LOAD);
- save_fpregs_to_fpstate(&current->thread.fpu);
+ save_fpregs_to_fpstate(x86_task_fpu(current));
}
__cpu_invalidate_fpregs_state();
@@ -445,10 +468,12 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin_mask);
void kernel_fpu_end(void)
{
- WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
+ /* Toggle kernel_fpu_allowed back to true: */
+ WARN_ON_FPU(this_cpu_read(kernel_fpu_allowed));
+ this_cpu_write(kernel_fpu_allowed, true);
- this_cpu_write(in_kernel_fpu, false);
- preempt_enable();
+ if (!irqs_disabled())
+ fpregs_unlock();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
@@ -458,7 +483,7 @@ EXPORT_SYMBOL_GPL(kernel_fpu_end);
*/
void fpu_sync_fpstate(struct fpu *fpu)
{
- WARN_ON_FPU(fpu != &current->thread.fpu);
+ WARN_ON_FPU(fpu != x86_task_fpu(current));
fpregs_lock();
trace_x86_fpu_before_save(fpu);
@@ -499,7 +524,7 @@ static inline void fpstate_init_fstate(struct fpstate *fpstate)
/*
* Used in two places:
* 1) Early boot to setup init_fpstate for non XSAVE systems
- * 2) fpu_init_fpstate_user() which is invoked from KVM
+ * 2) fpu_alloc_guest_fpstate() which is invoked from KVM
*/
void fpstate_init_user(struct fpstate *fpstate)
{
@@ -543,7 +568,7 @@ void fpstate_reset(struct fpu *fpu)
static inline void fpu_inherit_perms(struct fpu *dst_fpu)
{
if (fpu_state_size_dynamic()) {
- struct fpu *src_fpu = &current->group_leader->thread.fpu;
+ struct fpu *src_fpu = x86_task_fpu(current->group_leader);
spin_lock_irq(&current->sighand->siglock);
/* Fork also inherits the permissions of the parent */
@@ -563,7 +588,7 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
if (!ssp)
return 0;
- xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
+ xstate = get_xsave_addr(&x86_task_fpu(dst)->fpstate->regs.xsave,
XFEATURE_CET_USER);
/*
@@ -584,8 +609,16 @@ static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
unsigned long ssp)
{
- struct fpu *src_fpu = &current->thread.fpu;
- struct fpu *dst_fpu = &dst->thread.fpu;
+ /*
+ * We allocate the new FPU structure right after the end of the task struct.
+ * task allocation size already took this into account.
+ *
+ * This is safe because task_struct size is a multiple of cacheline size,
+ * thus x86_task_fpu() will always be cacheline aligned as well.
+ */
+ struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+
+ BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
/* The new task's FPU state cannot be valid in the hardware. */
dst_fpu->last_cpu = -1;
@@ -648,19 +681,22 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
if (update_fpu_shstk(dst, ssp))
return 1;
- trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
return 0;
}
/*
- * Whitelist the FPU register state embedded into task_struct for hardened
- * usercopy.
+ * While struct fpu is no longer part of struct thread_struct, it is still
+ * allocated after struct task_struct in the "task_struct" kmem cache. But
+ * since FPU is expected to be part of struct thread_struct, we have to
+ * adjust for it here.
*/
void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
{
- *offset = offsetof(struct thread_struct, fpu.__fpstate.regs);
+ /* The allocation follows struct task_struct. */
+ *offset = sizeof(struct task_struct) - offsetof(struct task_struct, thread);
+ *offset += offsetof(struct fpu, __fpstate.regs);
*size = fpu_kernel_cfg.default_size;
}
@@ -673,11 +709,18 @@ void fpu_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
* a state-restore is coming: either an explicit one,
* or a reschedule.
*/
-void fpu__drop(struct fpu *fpu)
+void fpu__drop(struct task_struct *tsk)
{
+ struct fpu *fpu;
+
+ if (test_tsk_thread_flag(tsk, TIF_NEED_FPU_LOAD))
+ return;
+
+ fpu = x86_task_fpu(tsk);
+
preempt_disable();
- if (fpu == &current->thread.fpu) {
+ if (fpu == x86_task_fpu(current)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
@@ -709,9 +752,9 @@ static inline void restore_fpregs_from_init_fpstate(u64 features_mask)
/*
* Reset current->fpu memory state to the init values.
*/
-static void fpu_reset_fpregs(void)
+static void fpu_reset_fpstate_regs(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
fpregs_lock();
__fpu_invalidate_fpregs_state(fpu);
@@ -740,11 +783,11 @@ static void fpu_reset_fpregs(void)
*/
void fpu__clear_user_states(struct fpu *fpu)
{
- WARN_ON_FPU(fpu != &current->thread.fpu);
+ WARN_ON_FPU(fpu != x86_task_fpu(current));
fpregs_lock();
if (!cpu_feature_enabled(X86_FEATURE_FPU)) {
- fpu_reset_fpregs();
+ fpu_reset_fpstate_regs();
fpregs_unlock();
return;
}
@@ -773,8 +816,8 @@ void fpu__clear_user_states(struct fpu *fpu)
void fpu_flush_thread(void)
{
- fpstate_reset(&current->thread.fpu);
- fpu_reset_fpregs();
+ fpstate_reset(x86_task_fpu(current));
+ fpu_reset_fpstate_regs();
}
/*
* Load FPU context before returning to userspace.
@@ -814,7 +857,7 @@ void fpregs_lock_and_load(void)
*/
void fpregs_assert_state_consistent(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
if (test_thread_flag(TIF_NEED_FPU_LOAD))
return;
@@ -826,7 +869,7 @@ EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
void fpregs_mark_activate(void)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
fpregs_activate(fpu);
fpu->last_cpu = smp_processor_id();
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 998a08f17e33..99db41bf9fa6 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void)
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
if (!boot_cpu_has(X86_FEATURE_FPU))
- fpstate_init_soft(&current->thread.fpu.fpstate->regs.soft);
+ ;
else
#endif
asm volatile ("fninit");
@@ -51,6 +51,9 @@ void fpu__init_cpu(void)
{
fpu__init_cpu_generic();
fpu__init_cpu_xstate();
+
+ /* Start allowing kernel-mode FPU: */
+ this_cpu_write(kernel_fpu_allowed, true);
}
static bool __init fpu__probe_without_cpuid(void)
@@ -73,6 +76,8 @@ static bool __init fpu__probe_without_cpuid(void)
static void __init fpu__init_system_early_generic(void)
{
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+
if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
if (fpu__probe_without_cpuid())
@@ -94,7 +99,6 @@ static void __init fpu__init_system_early_generic(void)
* Boot time FPU feature detection code:
*/
unsigned int mxcsr_feature_mask __ro_after_init = 0xffffffffu;
-EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
static void __init fpu__init_system_mxcsr(void)
{
@@ -150,11 +154,13 @@ static void __init fpu__init_task_struct_size(void)
{
int task_size = sizeof(struct task_struct);
+ task_size += sizeof(struct fpu);
+
/*
* Subtract off the static size of the register state.
* It potentially has a bunch of padding.
*/
- task_size -= sizeof(current->thread.fpu.__fpstate.regs);
+ task_size -= sizeof(union fpregs_state);
/*
* Add back the dynamically-calculated register state
@@ -164,14 +170,9 @@ static void __init fpu__init_task_struct_size(void)
/*
* We dynamically size 'struct fpu', so we require that
- * it be at the end of 'thread_struct' and that
- * 'thread_struct' be at the end of 'task_struct'. If
- * you hit a compile error here, check the structure to
- * see if something got added to the end.
+ * 'state' be at the end of 'it:
*/
CHECK_MEMBER_AT_END_OF(struct fpu, __fpstate);
- CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
- CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
arch_task_struct_size = task_size;
}
@@ -204,7 +205,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
fpu_kernel_cfg.default_size = size;
fpu_user_cfg.max_size = size;
fpu_user_cfg.default_size = size;
- fpstate_reset(&current->thread.fpu);
}
/*
@@ -213,7 +213,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
*/
void __init fpu__init_system(void)
{
- fpstate_reset(&current->thread.fpu);
fpu__init_system_early_generic();
/*
diff --git a/arch/x86/kernel/fpu/internal.h b/arch/x86/kernel/fpu/internal.h
index dbdb31f55fc7..975de070c9c9 100644
--- a/arch/x86/kernel/fpu/internal.h
+++ b/arch/x86/kernel/fpu/internal.h
@@ -18,7 +18,7 @@ static __always_inline __pure bool use_fxsr(void)
#ifdef CONFIG_X86_DEBUG_FPU
# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
#else
-# define WARN_ON_FPU(x) ({ (void)(x); 0; })
+# define WARN_ON_FPU(x) ({ BUILD_BUG_ON_INVALID(x); 0; })
#endif
/* Used in init.c */
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 6bc1eb2a21bd..0986c2200adc 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -45,7 +45,7 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
*/
static void sync_fpstate(struct fpu *fpu)
{
- if (fpu == &current->thread.fpu)
+ if (fpu == x86_task_fpu(current))
fpu_sync_fpstate(fpu);
}
@@ -63,7 +63,7 @@ static void fpu_force_restore(struct fpu *fpu)
* Only stopped child tasks can be used to modify the FPU
* state in the fpstate buffer:
*/
- WARN_ON_FPU(fpu == &current->thread.fpu);
+ WARN_ON_FPU(fpu == x86_task_fpu(current));
__fpu_invalidate_fpregs_state(fpu);
}
@@ -71,7 +71,7 @@ static void fpu_force_restore(struct fpu *fpu)
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
@@ -91,7 +91,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct fxregs_state newstate;
int ret;
@@ -133,7 +133,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
if (!cpu_feature_enabled(X86_FEATURE_XSAVE))
return -ENODEV;
- sync_fpstate(&target->thread.fpu);
+ sync_fpstate(x86_task_fpu(target));
copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_XSAVE);
return 0;
@@ -143,7 +143,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct xregs_state *tmpbuf = NULL;
int ret;
@@ -187,10 +187,11 @@ int ssp_active(struct task_struct *target, const struct user_regset *regset)
int ssp_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct cet_user_state *cetregs;
- if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK))
+ if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK) ||
+ !ssp_active(target, regset))
return -ENODEV;
sync_fpstate(fpu);
@@ -213,7 +214,7 @@ int ssp_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct xregs_state *xsave = &fpu->fpstate->regs.xsave;
struct cet_user_state *cetregs;
unsigned long user_ssp;
@@ -367,7 +368,7 @@ static void __convert_from_fxsr(struct user_i387_ia32_struct *env,
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
- __convert_from_fxsr(env, tsk, &tsk->thread.fpu.fpstate->regs.fxsave);
+ __convert_from_fxsr(env, tsk, &x86_task_fpu(tsk)->fpstate->regs.fxsave);
}
void convert_to_fxsr(struct fxregs_state *fxsave,
@@ -400,7 +401,7 @@ void convert_to_fxsr(struct fxregs_state *fxsave,
int fpregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct user_i387_ia32_struct env;
struct fxregs_state fxsave, *fx;
@@ -432,7 +433,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- struct fpu *fpu = &target->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(target);
struct user_i387_ia32_struct env;
int ret;
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 247f2225aa9f..c3ec2512f2bb 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -27,19 +27,14 @@
static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
struct _fpx_sw_bytes *fx_sw)
{
- int min_xstate_size = sizeof(struct fxregs_state) +
- sizeof(struct xstate_header);
void __user *fpstate = fxbuf;
unsigned int magic2;
if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw)))
return false;
- /* Check for the first magic field and other error scenarios. */
- if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
- fx_sw->xstate_size < min_xstate_size ||
- fx_sw->xstate_size > current->thread.fpu.fpstate->user_size ||
- fx_sw->xstate_size > fx_sw->extended_size)
+ /* Check for the first magic field */
+ if (fx_sw->magic1 != FP_XSTATE_MAGIC1)
goto setfx;
/*
@@ -48,13 +43,13 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
* fpstate layout with out copying the extended state information
* in the memory layout.
*/
- if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)))
+ if (__get_user(magic2, (__u32 __user *)(fpstate + x86_task_fpu(current)->fpstate->user_size)))
return false;
if (likely(magic2 == FP_XSTATE_MAGIC2))
return true;
setfx:
- trace_x86_fpu_xstate_check_failed(&current->thread.fpu);
+ trace_x86_fpu_xstate_check_failed(x86_task_fpu(current));
/* Set the parameters for fx only state */
fx_sw->magic1 = 0;
@@ -69,13 +64,13 @@ setfx:
static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf)
{
if (use_fxsr()) {
- struct xregs_state *xsave = &tsk->thread.fpu.fpstate->regs.xsave;
+ struct xregs_state *xsave = &x86_task_fpu(tsk)->fpstate->regs.xsave;
struct user_i387_ia32_struct env;
struct _fpstate_32 __user *fp = buf;
fpregs_lock();
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
- fxsave(&tsk->thread.fpu.fpstate->regs.fxsave);
+ fxsave(&x86_task_fpu(tsk)->fpstate->regs.fxsave);
fpregs_unlock();
convert_from_fxsr(&env, tsk);
@@ -119,7 +114,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
{
struct xregs_state __user *x = buf;
struct _fpx_sw_bytes sw_bytes = {};
- u32 xfeatures;
int err;
/* Setup the bytes not touched by the [f]xsave and reserved for SW. */
@@ -133,12 +127,6 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
(__u32 __user *)(buf + fpstate->user_size));
/*
- * Read the xfeatures which we copied (directly from the cpu or
- * from the state in task struct) to the user buffers.
- */
- err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
-
- /*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
@@ -149,17 +137,16 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
- xfeatures |= XFEATURE_MASK_FPSSE;
-
- err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
+ err |= set_xfeature_in_sigframe(x, XFEATURE_MASK_FPSSE);
return !err;
}
-static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
+static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru)
{
if (use_xsave())
- return xsave_to_user_sigframe(buf);
+ return xsave_to_user_sigframe(buf, pkru);
+
if (use_fxsr())
return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);
else
@@ -185,10 +172,10 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
* For [f]xsave state, update the SW reserved fields in the [f]xsave frame
* indicating the absence/presence of the extended state to the user.
*/
-bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
+bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru)
{
struct task_struct *tsk = current;
- struct fpstate *fpstate = tsk->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(tsk)->fpstate;
bool ia32_fxstate = (buf != buf_fx);
int ret;
@@ -228,7 +215,7 @@ retry:
fpregs_restore_userregs();
pagefault_disable();
- ret = copy_fpregs_to_sigframe(buf_fx);
+ ret = copy_fpregs_to_sigframe(buf_fx, pkru);
pagefault_enable();
fpregs_unlock();
@@ -276,7 +263,7 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
*/
static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
int ret;
/* Restore enabled features only. */
@@ -336,7 +323,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
bool ia32_fxstate)
{
struct task_struct *tsk = current;
- struct fpu *fpu = &tsk->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(tsk);
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
@@ -456,7 +443,7 @@ static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate)
*/
bool fpu__restore_sig(void __user *buf, int ia32_frame)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
void __user *buf_fx = buf;
bool ia32_fxstate = false;
bool success = false;
@@ -503,7 +490,7 @@ unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size)
{
- unsigned long frame_size = xstate_sigframe_size(current->thread.fpu.fpstate);
+ unsigned long frame_size = xstate_sigframe_size(x86_task_fpu(current)->fpstate);
*buf_fx = sp = round_down(sp - frame_size, 64);
if (ia32_frame && use_fxsr()) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 117e74c44e75..9aa9ac8399ae 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -13,16 +13,22 @@
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
+#include <linux/coredump.h>
+#include <linux/sort.h>
#include <asm/fpu/api.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/xcr.h>
+#include <asm/cpuid/api.h>
+#include <asm/msr.h>
#include <asm/tlbflush.h>
#include <asm/prctl.h>
#include <asm/elf.h>
+#include <uapi/asm/elf.h>
+
#include "context.h"
#include "internal.h"
#include "legacy.h"
@@ -58,6 +64,7 @@ static const char *xfeature_names[] =
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
+ "APX registers",
"unknown xstate feature",
};
@@ -76,6 +83,7 @@ static unsigned short xsave_cpuid_features[] __initdata = {
[XFEATURE_CET_USER] = X86_FEATURE_SHSTK,
[XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE,
[XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE,
+ [XFEATURE_APX] = X86_FEATURE_APX,
};
static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
@@ -84,6 +92,31 @@ static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
{ [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
+/*
+ * Ordering of xstate components in uncompacted format: The xfeature
+ * number does not necessarily indicate its position in the XSAVE buffer.
+ * This array defines the traversal order of xstate features.
+ */
+static unsigned int xfeature_uncompact_order[XFEATURE_MAX] __ro_after_init =
+ { [ 0 ... XFEATURE_MAX - 1] = -1};
+
+static inline unsigned int next_xfeature_order(unsigned int i, u64 mask)
+{
+ for (; xfeature_uncompact_order[i] != -1; i++) {
+ if (mask & BIT_ULL(xfeature_uncompact_order[i]))
+ break;
+ }
+
+ return i;
+}
+
+/* Iterate xstate features in uncompacted order: */
+#define for_each_extended_xfeature_in_order(i, mask) \
+ for (i = 0; \
+ i = next_xfeature_order(i, mask), \
+ xfeature_uncompact_order[i] != -1; \
+ i++)
+
#define XSTATE_FLAG_SUPERVISOR BIT(0)
#define XSTATE_FLAG_ALIGNED64 BIT(1)
@@ -178,10 +211,11 @@ void fpu__init_cpu_xstate(void)
* Must happen after CR4 setup and before xsetbv() to allow KVM
* lazy passthrough. Write independent of the dynamic state static
* key as that does not work on the boot CPU. This also ensures
- * that any stale state is wiped out from XFD.
+ * that any stale state is wiped out from XFD. Reset the per CPU
+ * xfd cache too.
*/
if (cpu_feature_enabled(X86_FEATURE_XFD))
- wrmsrl(MSR_IA32_XFD, init_fpstate.xfd);
+ xfd_set_state(init_fpstate.xfd);
/*
* XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features
@@ -194,7 +228,7 @@ void fpu__init_cpu_xstate(void)
* MSR_IA32_XSS sets supervisor states managed by XSAVES.
*/
if (boot_cpu_has(X86_FEATURE_XSAVES)) {
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
xfeatures_mask_independent());
}
}
@@ -204,16 +238,20 @@ static bool xfeature_enabled(enum xfeature xfeature)
return fpu_kernel_cfg.max_features & BIT_ULL(xfeature);
}
+static int compare_xstate_offsets(const void *xfeature1, const void *xfeature2)
+{
+ return xstate_offsets[*(unsigned int *)xfeature1] -
+ xstate_offsets[*(unsigned int *)xfeature2];
+}
+
/*
* Record the offsets and sizes of various xstates contained
- * in the XSAVE state memory layout.
+ * in the XSAVE state memory layout. Also, create an ordered
+ * list of xfeatures for handling out-of-order offsets.
*/
static void __init setup_xstate_cache(void)
{
- u32 eax, ebx, ecx, edx, i;
- /* start at the beginning of the "extended state" */
- unsigned int last_good_offset = offsetof(struct xregs_state,
- extended_state_area);
+ u32 eax, ebx, ecx, edx, xfeature, i = 0;
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
@@ -227,39 +265,30 @@ static void __init setup_xstate_cache(void)
xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);
- for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
- cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ for_each_extended_xfeature(xfeature, fpu_kernel_cfg.max_features) {
+ cpuid_count(CPUID_LEAF_XSTATE, xfeature, &eax, &ebx, &ecx, &edx);
- xstate_sizes[i] = eax;
- xstate_flags[i] = ecx;
+ xstate_sizes[xfeature] = eax;
+ xstate_flags[xfeature] = ecx;
/*
* If an xfeature is supervisor state, the offset in EBX is
* invalid, leave it to -1.
*/
- if (xfeature_is_supervisor(i))
+ if (xfeature_is_supervisor(xfeature))
continue;
- xstate_offsets[i] = ebx;
-
- /*
- * In our xstate size checks, we assume that the highest-numbered
- * xstate feature has the highest offset in the buffer. Ensure
- * it does.
- */
- WARN_ONCE(last_good_offset > xstate_offsets[i],
- "x86/fpu: misordered xstate at %d\n", last_good_offset);
+ xstate_offsets[xfeature] = ebx;
- last_good_offset = xstate_offsets[i];
+ /* Populate the list of xfeatures before sorting */
+ xfeature_uncompact_order[i++] = xfeature;
}
-}
-
-static void __init print_xstate_feature(u64 xstate_mask)
-{
- const char *feature_name;
- if (cpu_has_xfeatures(xstate_mask, &feature_name))
- pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", xstate_mask, feature_name);
+ /*
+ * Sort xfeatures by their offsets to support out-of-order
+ * offsets in the uncompacted format.
+ */
+ sort(xfeature_uncompact_order, i, sizeof(unsigned int), compare_xstate_offsets, NULL);
}
/*
@@ -267,19 +296,15 @@ static void __init print_xstate_feature(u64 xstate_mask)
*/
static void __init print_xstate_features(void)
{
- print_xstate_feature(XFEATURE_MASK_FP);
- print_xstate_feature(XFEATURE_MASK_SSE);
- print_xstate_feature(XFEATURE_MASK_YMM);
- print_xstate_feature(XFEATURE_MASK_BNDREGS);
- print_xstate_feature(XFEATURE_MASK_BNDCSR);
- print_xstate_feature(XFEATURE_MASK_OPMASK);
- print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
- print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
- print_xstate_feature(XFEATURE_MASK_PKRU);
- print_xstate_feature(XFEATURE_MASK_PASID);
- print_xstate_feature(XFEATURE_MASK_CET_USER);
- print_xstate_feature(XFEATURE_MASK_XTILE_CFG);
- print_xstate_feature(XFEATURE_MASK_XTILE_DATA);
+ int i;
+
+ for (i = 0; i < XFEATURE_MAX; i++) {
+ u64 mask = BIT_ULL(i);
+ const char *name;
+
+ if (cpu_has_xfeatures(mask, &name))
+ pr_info("x86/fpu: Supporting XSAVE feature 0x%03Lx: '%s'\n", mask, name);
+ }
}
/*
@@ -347,7 +372,8 @@ static __init void os_xrstor_booting(struct xregs_state *xstate)
XFEATURE_MASK_BNDCSR | \
XFEATURE_MASK_PASID | \
XFEATURE_MASK_CET_USER | \
- XFEATURE_MASK_XTILE)
+ XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_APX)
/*
* setup the xstate image representing the init state
@@ -394,7 +420,7 @@ int xfeature_size(int xfeature_nr)
u32 eax, ebx, ecx, edx;
CHECK_XFEATURE(xfeature_nr);
- cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+ cpuid_count(CPUID_LEAF_XSTATE, xfeature_nr, &eax, &ebx, &ecx, &edx);
return eax;
}
@@ -437,9 +463,9 @@ static void __init __xstate_dump_leaves(void)
* just in case there are some goodies up there
*/
for (i = 0; i < XFEATURE_MAX + 10; i++) {
- cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
+ cpuid_count(CPUID_LEAF_XSTATE, i, &eax, &ebx, &ecx, &edx);
pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n",
- XSTATE_CPUID, i, eax, ebx, ecx, edx);
+ CPUID_LEAF_XSTATE, i, eax, ebx, ecx, edx);
}
}
@@ -480,7 +506,7 @@ static int __init check_xtile_data_against_struct(int size)
* Check the maximum palette id:
* eax: the highest numbered palette subleaf.
*/
- cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx);
+ cpuid_count(CPUID_LEAF_TILE, 0, &max_palid, &ebx, &ecx, &edx);
/*
* Cross-check each tile size and find the maximum number of
@@ -494,7 +520,7 @@ static int __init check_xtile_data_against_struct(int size)
* eax[31:16]: bytes per title
* ebx[31:16]: the max names (or max number of tiles)
*/
- cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx);
+ cpuid_count(CPUID_LEAF_TILE, palid, &eax, &ebx, &edx, &edx);
tile_size = eax >> 16;
max = ebx >> 16;
@@ -547,6 +573,7 @@ static bool __init check_xstate_against_struct(int nr)
case XFEATURE_PASID: return XCHECK_SZ(sz, nr, struct ia32_pasid_state);
case XFEATURE_XTILE_CFG: return XCHECK_SZ(sz, nr, struct xtile_cfg);
case XFEATURE_CET_USER: return XCHECK_SZ(sz, nr, struct cet_user_state);
+ case XFEATURE_APX: return XCHECK_SZ(sz, nr, struct apx_state);
case XFEATURE_XTILE_DATA: check_xtile_data_against_struct(sz); return true;
default:
XSTATE_WARN_ON(1, "No structure for xstate: %d\n", nr);
@@ -559,13 +586,20 @@ static bool __init check_xstate_against_struct(int nr)
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
{
unsigned int topmost = fls64(xfeatures) - 1;
- unsigned int offset = xstate_offsets[topmost];
+ unsigned int offset, i;
if (topmost <= XFEATURE_SSE)
return sizeof(struct xregs_state);
- if (compacted)
+ if (compacted) {
offset = xfeature_get_offset(xfeatures, topmost);
+ } else {
+ /* Walk through the xfeature order to pick the last */
+ for_each_extended_xfeature_in_order(i, xfeatures)
+ topmost = xfeature_uncompact_order[i];
+ offset = xstate_offsets[topmost];
+ }
+
return offset + xstate_sizes[topmost];
}
@@ -629,7 +663,7 @@ static unsigned int __init get_compacted_size(void)
* are no supervisor states, but XSAVEC still uses compacted
* format.
*/
- cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+ cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
return ebx;
}
@@ -646,7 +680,7 @@ static unsigned int __init get_xsave_compacted_size(void)
return get_compacted_size();
/* Disable independent features. */
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor());
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor());
/*
* Ask the hardware what size is required of the buffer.
@@ -655,7 +689,7 @@ static unsigned int __init get_xsave_compacted_size(void)
size = get_compacted_size();
/* Re-enable independent features so XSAVES will work on them again. */
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() | mask);
return size;
}
@@ -670,7 +704,7 @@ static unsigned int __init get_xsave_size_user(void)
* containing all the *user* state components
* corresponding to bits currently set in XCR0.
*/
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
return ebx;
}
@@ -718,6 +752,8 @@ static int __init init_xstate_size(void)
*/
static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
{
+ pr_info("x86/fpu: XSAVE disabled\n");
+
fpu_kernel_cfg.max_features = 0;
cr4_clear_bits(X86_CR4_OSXSAVE);
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
@@ -734,7 +770,7 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
*/
init_fpstate.xfd = 0;
- fpstate_reset(&current->thread.fpu);
+ fpstate_reset(x86_task_fpu(current));
}
/*
@@ -759,21 +795,16 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
return;
}
- if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
- WARN_ON_FPU(1);
- return;
- }
-
/*
* Find user xstates supported by the processor.
*/
- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+ cpuid_count(CPUID_LEAF_XSTATE, 0, &eax, &ebx, &ecx, &edx);
fpu_kernel_cfg.max_features = eax + ((u64)edx << 32);
/*
* Find supervisor xstates supported by the processor.
*/
- cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+ cpuid_count(CPUID_LEAF_XSTATE, 1, &eax, &ebx, &ecx, &edx);
fpu_kernel_cfg.max_features |= ecx + ((u64)edx << 32);
if ((fpu_kernel_cfg.max_features & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
@@ -787,6 +818,20 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
goto out_disable;
}
+ if (fpu_kernel_cfg.max_features & XFEATURE_MASK_APX &&
+ fpu_kernel_cfg.max_features & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)) {
+ /*
+ * This is a problematic CPU configuration where two
+ * conflicting state components are both enumerated.
+ */
+ pr_err("x86/fpu: Both APX/MPX present in the CPU's xstate features: 0x%llx.\n",
+ fpu_kernel_cfg.max_features);
+ goto out_disable;
+ }
+
+ fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features &
+ XFEATURE_MASK_INDEPENDENT;
+
/*
* Clear XSAVE features that are disabled in the normal CPUID.
*/
@@ -843,9 +888,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
if (err)
goto out_disable;
- /* Reset the state for the current task */
- fpstate_reset(&current->thread.fpu);
-
/*
* Update info used for ptrace frames; use standard-format size and no
* supervisor xstates:
@@ -861,7 +903,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
init_fpstate.xfeatures = fpu_kernel_cfg.default_features;
if (init_fpstate.size > sizeof(init_fpstate.regs)) {
- pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d), disabling XSAVE\n",
+ pr_warn("x86/fpu: init_fpstate buffer too small (%zu < %d)\n",
sizeof(init_fpstate.regs), init_fpstate.size);
goto out_disable;
}
@@ -873,7 +915,7 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
* xfeatures mask.
*/
if (xfeatures != fpu_kernel_cfg.max_features) {
- pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init, disabling XSAVE\n",
+ pr_err("x86/fpu: xfeatures modified from 0x%016llx to 0x%016llx during init\n",
xfeatures, fpu_kernel_cfg.max_features);
goto out_disable;
}
@@ -913,12 +955,12 @@ void fpu__resume_cpu(void)
* of XSAVES and MSR_IA32_XSS.
*/
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
- wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor() |
+ wrmsrq(MSR_IA32_XSS, xfeatures_mask_supervisor() |
xfeatures_mask_independent());
}
if (fpu_state_size_dynamic())
- wrmsrl(MSR_IA32_XFD, current->thread.fpu.fpstate->xfd);
+ wrmsrq(MSR_IA32_XFD, x86_task_fpu(current)->fpstate->xfd);
}
/*
@@ -990,6 +1032,20 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
return __raw_xsave_addr(xsave, xfeature_nr);
}
+EXPORT_SYMBOL_GPL(get_xsave_addr);
+
+/*
+ * Given an xstate feature nr, calculate where in the xsave buffer the state is.
+ * The xsave buffer should be in standard format, not compacted (e.g. user mode
+ * signal frames).
+ */
+void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr)
+{
+ if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
+ return NULL;
+
+ return (void __user *)xsave + xstate_offsets[xfeature_nr];
+}
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -1066,10 +1122,9 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
const unsigned int off_mxcsr = offsetof(struct fxregs_state, mxcsr);
struct xregs_state *xinit = &init_fpstate.regs.xsave;
struct xregs_state *xsave = &fpstate->regs.xsave;
+ unsigned int zerofrom, i, xfeature;
struct xstate_header header;
- unsigned int zerofrom;
u64 mask;
- int i;
memset(&header, 0, sizeof(header));
header.xfeatures = xsave->header.xfeatures;
@@ -1138,15 +1193,16 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
*/
mask = header.xfeatures;
- for_each_extended_xfeature(i, mask) {
+ for_each_extended_xfeature_in_order(i, mask) {
+ xfeature = xfeature_uncompact_order[i];
/*
* If there was a feature or alignment gap, zero the space
* in the destination buffer.
*/
- if (zerofrom < xstate_offsets[i])
- membuf_zero(&to, xstate_offsets[i] - zerofrom);
+ if (zerofrom < xstate_offsets[xfeature])
+ membuf_zero(&to, xstate_offsets[xfeature] - zerofrom);
- if (i == XFEATURE_PKRU) {
+ if (xfeature == XFEATURE_PKRU) {
struct pkru_state pkru = {0};
/*
* PKRU is not necessarily up to date in the
@@ -1156,14 +1212,14 @@ void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate,
membuf_write(&to, &pkru, sizeof(pkru));
} else {
membuf_write(&to,
- __raw_xsave_addr(xsave, i),
- xstate_sizes[i]);
+ __raw_xsave_addr(xsave, xfeature),
+ xstate_sizes[xfeature]);
}
/*
* Keep track of the last copied state in the non-compacted
* target buffer for gap zeroing.
*/
- zerofrom = xstate_offsets[i] + xstate_sizes[i];
+ zerofrom = xstate_offsets[xfeature] + xstate_sizes[xfeature];
}
out:
@@ -1186,8 +1242,8 @@ out:
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode copy_mode)
{
- __copy_xstate_to_uabi_buf(to, tsk->thread.fpu.fpstate,
- tsk->thread.fpu.fpstate->user_xfeatures,
+ __copy_xstate_to_uabi_buf(to, x86_task_fpu(tsk)->fpstate,
+ x86_task_fpu(tsk)->fpstate->user_xfeatures,
tsk->thread.pkru, copy_mode);
}
@@ -1327,7 +1383,7 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u
int copy_sigframe_from_user_to_xstate(struct task_struct *tsk,
const void __user *ubuf)
{
- return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru);
+ return copy_uabi_to_xstate(x86_task_fpu(tsk)->fpstate, NULL, ubuf, &tsk->thread.pkru);
}
static bool validate_independent_components(u64 mask)
@@ -1393,9 +1449,9 @@ void xrstors(struct xregs_state *xstate, u64 mask)
}
#if IS_ENABLED(CONFIG_KVM)
-void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature)
+void fpstate_clear_xstate_component(struct fpstate *fpstate, unsigned int xfeature)
{
- void *addr = get_xsave_addr(&fps->regs.xsave, xfeature);
+ void *addr = get_xsave_addr(&fpstate->regs.xsave, xfeature);
if (addr)
memset(addr, 0, xstate_sizes[xfeature]);
@@ -1421,7 +1477,7 @@ static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
* The XFD MSR does not match fpstate->xfd. That's invalid when
* the passed in fpstate is current's fpstate.
*/
- if (fpstate->xfd == current->thread.fpu.fpstate->xfd)
+ if (fpstate->xfd == x86_task_fpu(current)->fpstate->xfd)
return false;
/*
@@ -1433,8 +1489,8 @@ static bool xstate_op_valid(struct fpstate *fpstate, u64 mask, bool rstor)
return rstor;
/*
- * XSAVE(S): clone(), fpu_swap_kvm_fpu()
- * XRSTORS(S): fpu_swap_kvm_fpu()
+ * XSAVE(S): clone(), fpu_swap_kvm_fpstate()
+ * XRSTORS(S): fpu_swap_kvm_fpstate()
*/
/*
@@ -1498,7 +1554,7 @@ void fpstate_free(struct fpu *fpu)
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
unsigned int usize, struct fpu_guest *guest_fpu)
{
- struct fpu *fpu = &current->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current);
struct fpstate *curfps, *newfps = NULL;
unsigned int fpsize;
bool in_use;
@@ -1591,7 +1647,7 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
* AVX512.
*/
bool compacted = cpu_feature_enabled(X86_FEATURE_XCOMPACTED);
- struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current->group_leader);
struct fpu_state_perm *perm;
unsigned int ksize, usize;
u64 mask;
@@ -1601,16 +1657,20 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
if ((permitted & requested) == requested)
return 0;
- /* Calculate the resulting kernel state size */
+ /*
+ * Calculate the resulting kernel state size. Note, @permitted also
+ * contains supervisor xfeatures even though supervisor are always
+ * permitted for kernel and guest FPUs, and never permitted for user
+ * FPUs.
+ */
mask = permitted | requested;
- /* Take supervisor states into account on the host */
- if (!guest)
- mask |= xfeatures_mask_supervisor();
ksize = xstate_calculate_size(mask, compacted);
- /* Calculate the resulting user state size */
- mask &= XFEATURE_MASK_USER_SUPPORTED;
- usize = xstate_calculate_size(mask, false);
+ /*
+ * Calculate the resulting user state size. Take care not to clobber
+ * the supervisor xfeatures in the new mask!
+ */
+ usize = xstate_calculate_size(mask & XFEATURE_MASK_USER_SUPPORTED, false);
if (!guest) {
ret = validate_sigaltstack(usize);
@@ -1694,7 +1754,7 @@ int __xfd_enable_feature(u64 xfd_err, struct fpu_guest *guest_fpu)
return -EPERM;
}
- fpu = &current->group_leader->thread.fpu;
+ fpu = x86_task_fpu(current->group_leader);
perm = guest_fpu ? &fpu->guest_perm : &fpu->perm;
ksize = perm->__state_size;
usize = perm->__user_state_size;
@@ -1799,7 +1859,7 @@ long fpu_xstate_prctl(int option, unsigned long arg2)
*/
static void avx512_status(struct seq_file *m, struct task_struct *task)
{
- unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ unsigned long timestamp = READ_ONCE(x86_task_fpu(task)->avx512_timestamp);
long delta;
if (!timestamp) {
@@ -1836,3 +1896,89 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}
#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_COREDUMP
+static const char owner_name[] = "LINUX";
+
+/*
+ * Dump type, size, offset and flag values for every xfeature that is present.
+ */
+static int dump_xsave_layout_desc(struct coredump_params *cprm)
+{
+ int num_records = 0;
+ int i;
+
+ for_each_extended_xfeature(i, fpu_user_cfg.max_features) {
+ struct x86_xfeat_component xc = {
+ .type = i,
+ .size = xstate_sizes[i],
+ .offset = xstate_offsets[i],
+ /* reserved for future use */
+ .flags = 0,
+ };
+
+ if (!dump_emit(cprm, &xc, sizeof(xc)))
+ return 0;
+
+ num_records++;
+ }
+ return num_records;
+}
+
+static u32 get_xsave_desc_size(void)
+{
+ u32 cnt = 0;
+ u32 i;
+
+ for_each_extended_xfeature(i, fpu_user_cfg.max_features)
+ cnt++;
+
+ return cnt * (sizeof(struct x86_xfeat_component));
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+ int num_records = 0;
+ struct elf_note en;
+
+ if (!fpu_user_cfg.max_features)
+ return 0;
+
+ en.n_namesz = sizeof(owner_name);
+ en.n_descsz = get_xsave_desc_size();
+ en.n_type = NT_X86_XSAVE_LAYOUT;
+
+ if (!dump_emit(cprm, &en, sizeof(en)))
+ return 1;
+ if (!dump_emit(cprm, owner_name, en.n_namesz))
+ return 1;
+ if (!dump_align(cprm, 4))
+ return 1;
+
+ num_records = dump_xsave_layout_desc(cprm);
+ if (!num_records)
+ return 1;
+
+ /* Total size should be equal to the number of records */
+ if ((sizeof(struct x86_xfeat_component) * num_records) != en.n_descsz)
+ return 1;
+
+ return 0;
+}
+
+int elf_coredump_extra_notes_size(void)
+{
+ int size;
+
+ if (!fpu_user_cfg.max_features)
+ return 0;
+
+ /* .note header */
+ size = sizeof(struct elf_note);
+ /* Name plus alignment to 4 bytes */
+ size += roundup(sizeof(owner_name), 4);
+ size += get_xsave_desc_size();
+
+ return size;
+}
+#endif /* CONFIG_COREDUMP */
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 3518fb26d06b..52ce19289989 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -5,6 +5,7 @@
#include <asm/cpufeature.h>
#include <asm/fpu/xstate.h>
#include <asm/fpu/xcr.h>
+#include <asm/msr.h>
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u64, xfd_state);
@@ -22,7 +23,7 @@ static inline void xstate_init_xcomp_bv(struct xregs_state *xsave, u64 mask)
static inline u64 xstate_get_group_perm(bool guest)
{
- struct fpu *fpu = &current->group_leader->thread.fpu;
+ struct fpu *fpu = x86_task_fpu(current->group_leader);
struct fpu_state_perm *perm;
/* Pairs with WRITE_ONCE() in xstate_request_perm() */
@@ -54,7 +55,7 @@ extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void
extern void fpu__init_cpu_xstate(void);
extern void fpu__init_system_xstate(unsigned int legacy_size);
-extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+extern void __user *get_xsave_addr_user(struct xregs_state __user *xsave, int xfeature_nr);
static inline u64 xfeatures_mask_supervisor(void)
{
@@ -64,38 +65,73 @@ static inline u64 xfeatures_mask_supervisor(void)
static inline u64 xfeatures_mask_independent(void)
{
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
- return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
+ return fpu_kernel_cfg.independent_features & ~XFEATURE_MASK_LBR;
- return XFEATURE_MASK_INDEPENDENT;
+ return fpu_kernel_cfg.independent_features;
+}
+
+static inline int set_xfeature_in_sigframe(struct xregs_state __user *xbuf, u64 mask)
+{
+ u64 xfeatures;
+ int err;
+
+ /* Read the xfeatures value already saved in the user buffer */
+ err = __get_user(xfeatures, &xbuf->header.xfeatures);
+ xfeatures |= mask;
+ err |= __put_user(xfeatures, &xbuf->header.xfeatures);
+
+ return err;
+}
+
+/*
+ * Update the value of PKRU register that was already pushed onto the signal frame.
+ */
+static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
+{
+ int err;
+
+ if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
+ return 0;
+
+ /* Mark PKRU as in-use so that it is restored correctly. */
+ err = set_xfeature_in_sigframe(buf, XFEATURE_MASK_PKRU);
+ if (err)
+ return err;
+
+ /* Update PKRU value in the userspace xsave buffer. */
+ return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
}
/* XSAVE/XRSTOR wrapper functions */
#ifdef CONFIG_X86_64
-#define REX_PREFIX "0x48, "
+#define REX_SUFFIX "64"
#else
-#define REX_PREFIX
+#define REX_SUFFIX
#endif
-/* These macros all use (%edi)/(%rdi) as the single memory argument. */
-#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
-#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
-#define XSAVEC ".byte " REX_PREFIX "0x0f,0xc7,0x27"
-#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
-#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
+#define XSAVE "xsave" REX_SUFFIX " %[xa]"
+#define XSAVEOPT "xsaveopt" REX_SUFFIX " %[xa]"
+#define XSAVEC "xsavec" REX_SUFFIX " %[xa]"
+#define XSAVES "xsaves" REX_SUFFIX " %[xa]"
+#define XRSTOR "xrstor" REX_SUFFIX " %[xa]"
+#define XRSTORS "xrstors" REX_SUFFIX " %[xa]"
/*
* After this @err contains 0 on success or the trap number when the
* operation raises an exception.
+ *
+ * The [xa] input parameter below represents the struct xregs_state pointer
+ * and the asm symbolic name for the argument used in the XSAVE/XRSTOR insns
+ * above.
*/
#define XSTATE_OP(op, st, lmask, hmask, err) \
asm volatile("1:" op "\n\t" \
"xor %[err], %[err]\n" \
- "2:\n\t" \
+ "2:\n" \
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_FAULT_MCE_SAFE) \
: [err] "=a" (err) \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : [xa] "m" (*(st)), "a" (lmask), "d" (hmask) \
: "memory")
/*
@@ -108,23 +144,19 @@ static inline u64 xfeatures_mask_independent(void)
* Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
* supports modified optimization which is not supported by XSAVE.
*
- * We use XSAVE as a fallback.
- *
- * The 661 label is defined in the ALTERNATIVE* macros as the address of the
- * original instruction which gets replaced. We need to use it here as the
- * address of the instruction where we might get an exception at.
+ * Use XSAVE as a fallback.
*/
#define XSTATE_XSAVE(st, lmask, hmask, err) \
- asm volatile(ALTERNATIVE_3(XSAVE, \
+ asm volatile("1: " ALTERNATIVE_3(XSAVE, \
XSAVEOPT, X86_FEATURE_XSAVEOPT, \
XSAVEC, X86_FEATURE_XSAVEC, \
XSAVES, X86_FEATURE_XSAVES) \
- "\n" \
+ "\n\t" \
"xor %[err], %[err]\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE_REG(661b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
+ _ASM_EXTABLE_TYPE_REG(1b, 3b, EX_TYPE_EFAULT_REG, %[err]) \
: [err] "=r" (err) \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : [xa] "m" (*(st)), "a" (lmask), "d" (hmask) \
: "memory")
/*
@@ -132,13 +164,13 @@ static inline u64 xfeatures_mask_independent(void)
* XSAVE area format.
*/
#define XSTATE_XRESTORE(st, lmask, hmask) \
- asm volatile(ALTERNATIVE(XRSTOR, \
+ asm volatile("1: " ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
"3:\n" \
- _ASM_EXTABLE_TYPE(661b, 3b, EX_TYPE_FPU_RESTORE) \
+ _ASM_EXTABLE_TYPE(1b, 3b, EX_TYPE_FPU_RESTORE) \
: \
- : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : [xa] "m" (*(st)), "a" (lmask), "d" (hmask) \
: "memory")
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_DEBUG_FPU)
@@ -148,20 +180,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs
#endif
#ifdef CONFIG_X86_64
+static inline void xfd_set_state(u64 xfd)
+{
+ wrmsrq(MSR_IA32_XFD, xfd);
+ __this_cpu_write(xfd_state, xfd);
+}
+
static inline void xfd_update_state(struct fpstate *fpstate)
{
if (fpu_state_size_dynamic()) {
u64 xfd = fpstate->xfd;
- if (__this_cpu_read(xfd_state) != xfd) {
- wrmsrl(MSR_IA32_XFD, xfd);
- __this_cpu_write(xfd_state, xfd);
- }
+ if (__this_cpu_read(xfd_state) != xfd)
+ xfd_set_state(xfd);
}
}
extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu);
#else
+static inline void xfd_set_state(u64 xfd) { }
+
static inline void xfd_update_state(struct fpstate *fpstate) { }
static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) {
@@ -254,14 +292,14 @@ static inline u64 xfeatures_need_sigframe_write(void)
* The caller has to zero buf::header before calling this because XSAVE*
* does not touch the reserved fields in the header.
*/
-static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
+static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkru)
{
/*
* Include the features which are not xsaved/rstored by the kernel
* internally, e.g. PKRU. That's user space ABI and also required
* to allow the signal handler to modify PKRU.
*/
- struct fpstate *fpstate = current->thread.fpu.fpstate;
+ struct fpstate *fpstate = x86_task_fpu(current)->fpstate;
u64 mask = fpstate->user_xfeatures;
u32 lmask;
u32 hmask;
@@ -279,6 +317,9 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
clac();
+ if (!err)
+ err = update_pkru_in_sigframe(buf, pkru);
+
return err;
}
@@ -292,7 +333,7 @@ static inline int xrstor_from_user_sigframe(struct xregs_state __user *buf, u64
u32 hmask = mask >> 32;
int err;
- xfd_validate_state(current->thread.fpu.fpstate, mask, true);
+ xfd_validate_state(x86_task_fpu(current)->fpstate, mask, true);
stac();
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);