From 0a7815515471a80379bfefc9f1913e0d8c87fbfb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 10:58:52 +0200 Subject: x86/fpu: Rename unlazy_fpu() to fpu__save() This function is a misnomer on two levels: 1) it doesn't really manipulate TS on modern CPUs anymore, its primary purpose is to save FPU state, used: - when executing fork()/clone(): to copy current FPU state to the child's FPU state. - when handling math exceptions: to generate the math error si_code in the signal frame. 2) even on legacy CPUs it doesn't actually 'unlazy', if then it lazies the FPU state: as a side effect of the old FNSAVE instruction which clears (destroys) FPU state it's necessary to set CR0::TS. So rename it to fpu__save() to better reflect its purpose. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6eb6fcb83f63..d4419da9b210 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -101,7 +101,7 @@ static inline int user_has_fpu(void) return current->thread.fpu.has_fpu; } -extern void unlazy_fpu(struct task_struct *tsk); +extern void fpu__save(struct task_struct *tsk); #endif /* __ASSEMBLY__ */ -- cgit v1.2.3-59-g8ed1b From 97185c95f7ab7f752473c34672dab0925758094b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:02:02 +0200 Subject: x86/fpu: Split an fpstate_alloc_init() function out of init_fpu() Most init_fpu() users don't want the register-saving aspect of the function, they are calling it for 'current' and when FPU registers are not allocated and initialized yet. Split out a simplified API that does just that (and add debug-checks for these conditions): fpstate_alloc_init(). Use it where appropriate. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 3 +++ arch/x86/kernel/i387.c | 31 +++++++++++++++++++++++++++++++ arch/x86/kernel/process.c | 2 +- arch/x86/kernel/traps.c | 2 +- arch/x86/kernel/xsave.c | 2 +- arch/x86/kvm/x86.c | 2 +- arch/x86/math-emu/fpu_entry.c | 2 +- 7 files changed, 39 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index d4419da9b210..1a896b4533c4 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -18,7 +18,10 @@ struct pt_regs; struct user_i387_struct; +extern int fpstate_alloc_init(struct task_struct *curr); + extern int init_fpu(struct task_struct *child); + extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 29251f5668b1..56b6e726fb60 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -246,6 +246,37 @@ void fpu_finit(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpu_finit); +/* + * Allocate the backing store for the current task's FPU registers + * and initialize the registers themselves as well. + * + * Can fail. + */ +int fpstate_alloc_init(struct task_struct *curr) +{ + int ret; + + if (WARN_ON_ONCE(curr != current)) + return -EINVAL; + if (WARN_ON_ONCE(curr->flags & PF_USED_MATH)) + return -EINVAL; + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + ret = fpu_alloc(&curr->thread.fpu); + if (ret) + return ret; + + fpu_finit(&curr->thread.fpu); + + /* Safe to do for the current task: */ + curr->flags |= PF_USED_MATH; + + return 0; +} +EXPORT_SYMBOL_GPL(fpstate_alloc_init); + /* * The _current_ task is using the FPU for the first time * so initialize it and set the mxcsr to its default diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6e338e3b1dc0..abdb81d07423 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -159,7 +159,7 @@ void flush_thread(void) } else { if (!tsk_used_math(tsk)) { /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(init_fpu(tsk))) + if (WARN_ON(fpstate_alloc_init(tsk))) force_sig(SIGKILL, tsk); user_fpu_begin(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 12f29f9907cd..cf9c9627be19 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -846,7 +846,7 @@ void math_state_restore(void) /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + if (fpstate_alloc_init(tsk)) { /* * ran out of memory! */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 87a815b85f3e..a977cdd03825 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -349,7 +349,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!used_math() && init_fpu(tsk)) + if (!used_math() && fpstate_alloc_init(tsk)) return -1; if (!static_cpu_has(X86_FEATURE_FPU)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c73efcd03e29..bfc396632ee8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6600,7 +6600,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - if (!tsk_used_math(current) && init_fpu(current)) + if (!tsk_used_math(current) && fpstate_alloc_init(current)) return -ENOMEM; if (vcpu->sigset_active) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9b868124128d..c9ff09a02385 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -149,7 +149,7 @@ void math_emulate(struct math_emu_info *info) struct desc_struct code_descriptor; if (!used_math()) { - if (init_fpu(current)) { + if (fpstate_alloc_init(current)) { do_group_exit(SIGKILL); return; } -- cgit v1.2.3-59-g8ed1b From bda283796b38baae3ec5c8c788b143b1fb9dcc77 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:17:28 +0200 Subject: x86/fpu: Make init_fpu() static Now that the allocation users have been split off into a separate function, init_fpu() has become local to i387.c: make it static. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 2 -- arch/x86/kernel/i387.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 1a896b4533c4..0367d17371f5 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -20,8 +20,6 @@ struct user_i387_struct; extern int fpstate_alloc_init(struct task_struct *curr); -extern int init_fpu(struct task_struct *child); - extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 56b6e726fb60..95079026c386 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -283,7 +283,7 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init); * value at reset if we support XMM instructions and then * remember the current task has used the FPU. */ -int init_fpu(struct task_struct *tsk) +static int init_fpu(struct task_struct *tsk) { int ret; @@ -306,7 +306,6 @@ int init_fpu(struct task_struct *tsk) set_stopped_child_used_math(tsk); return 0; } -EXPORT_SYMBOL_GPL(init_fpu); /* * The xstateregs_active() routine is the same as the fpregs_active() routine, -- cgit v1.2.3-59-g8ed1b From c0ee2cf61be0bd3db2a30d76056a2e09fa48272e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 13:01:52 +0200 Subject: x86/fpu: Rename fpu_finit() to fpstate_init() Make it clear that we are initializing the in-memory FPU context area, no the FPU registers. Also move it to the fpu__*() namespace. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 2 +- arch/x86/kernel/i387.c | 8 ++++---- arch/x86/kernel/xsave.c | 2 +- arch/x86/kvm/x86.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0367d17371f5..6552a16e0e38 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -19,8 +19,8 @@ struct pt_regs; struct user_i387_struct; extern int fpstate_alloc_init(struct task_struct *curr); +extern void fpstate_init(struct fpu *fpu); -extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 5f2feb63b72a..e0c16e86deb0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -225,7 +225,7 @@ void fpu_init(void) eager_fpu_init(); } -void fpu_finit(struct fpu *fpu) +void fpstate_init(struct fpu *fpu) { if (!cpu_has_fpu) { finit_soft_fpu(&fpu->state->soft); @@ -244,7 +244,7 @@ void fpu_finit(struct fpu *fpu) fp->fos = 0xffff0000u; } } -EXPORT_SYMBOL_GPL(fpu_finit); +EXPORT_SYMBOL_GPL(fpstate_init); int fpstate_alloc(struct fpu *fpu) { @@ -284,7 +284,7 @@ int fpstate_alloc_init(struct task_struct *curr) if (ret) return ret; - fpu_finit(&curr->thread.fpu); + fpstate_init(&curr->thread.fpu); /* Safe to do for the current task: */ curr->flags |= PF_USED_MATH; @@ -318,7 +318,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) if (ret) return ret; - fpu_finit(&child->thread.fpu); + fpstate_init(&child->thread.fpu); /* Safe to do for stopped child tasks: */ child->flags |= PF_USED_MATH; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a977cdd03825..163b5cc582ef 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -395,7 +395,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { - fpu_finit(fpu); + fpstate_init(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 80a411c83083..26b1f89fc608 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7011,7 +7011,7 @@ int fx_init(struct kvm_vcpu *vcpu) if (err) return err; - fpu_finit(&vcpu->arch.guest_fpu); + fpstate_init(&vcpu->arch.guest_fpu); if (cpu_has_xsaves) vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; -- cgit v1.2.3-59-g8ed1b From 81683cc8277e79decff4d0cf82ae0e17d2fe465f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 11:52:13 +0200 Subject: x86/fpu: Factor out fpu__flush_thread() from flush_thread() flush_thread() open codes a lot of FPU internals - create a separate function for it in fpu/core.c. Turns out that this does not hurt performance: text data bss dec hex filename 11843039 1884440 1130496 14857975 e2b6f7 vmlinux.before 11843039 1884440 1130496 14857975 e2b6f7 vmlinux.after and since this is a slowpath clarity comes first anyway. We can reconsider inlining decisions after the FPU code has been cleaned up. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 1 + arch/x86/kernel/fpu/core.c | 17 +++++++++++++++++ arch/x86/kernel/process.c | 14 +------------- 3 files changed, 19 insertions(+), 13 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6552a16e0e38..d6fc84440b73 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -20,6 +20,7 @@ struct user_i387_struct; extern int fpstate_alloc_init(struct task_struct *curr); extern void fpstate_init(struct fpu *fpu); +extern void fpu__flush_thread(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9211582f5d3f..787bf57b8422 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,6 +227,23 @@ static int fpu__unlazy_stopped(struct task_struct *child) return 0; } +void fpu__flush_thread(struct task_struct *tsk) +{ + if (!use_eager_fpu()) { + /* FPU state will be reallocated lazily at the first use. */ + drop_fpu(tsk); + fpstate_free(&tsk->thread.fpu); + } else { + if (!tsk_used_math(tsk)) { + /* kthread execs. TODO: cleanup this horror. */ + if (WARN_ON(fpstate_alloc_init(tsk))) + force_sig(SIGKILL, tsk); + user_fpu_begin(); + } + restore_init_xstate(); + } +} + /* * The xstateregs_active() routine is the same as the fpregs_active() routine, * as the "regset->n" for the xstate regset will be updated based on the feature diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6ab180f40a7e..52fd8f6f44c7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -146,19 +146,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - if (!use_eager_fpu()) { - /* FPU state will be reallocated lazily at the first use. */ - drop_fpu(tsk); - fpstate_free(&tsk->thread.fpu); - } else { - if (!tsk_used_math(tsk)) { - /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(fpstate_alloc_init(tsk))) - force_sig(SIGKILL, tsk); - user_fpu_begin(); - } - restore_init_xstate(); - } + fpu__flush_thread(tsk); } static void hard_disable_TSC(void) -- cgit v1.2.3-59-g8ed1b From 3a0aee4801d475b64a408539c01ec0d17d52192b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 13:16:47 +0200 Subject: x86/fpu: Rename math_state_restore() to fpu__restore() Move to the new fpu__*() namespace. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- Documentation/preempt-locking.txt | 2 +- arch/x86/include/asm/i387.h | 2 +- arch/x86/kernel/fpu/core.c | 6 +++--- arch/x86/kernel/fpu/xsave.c | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/traps.c | 2 +- drivers/lguest/x86/core.c | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt index 57883ca2498b..e89ce6624af2 100644 --- a/Documentation/preempt-locking.txt +++ b/Documentation/preempt-locking.txt @@ -48,7 +48,7 @@ preemption must be disabled around such regions. Note, some FPU functions are already explicitly preempt safe. For example, kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. -However, math_state_restore must be called with preemption disabled. +However, fpu__restore() must be called with preemption disabled. RULE #3: Lock acquire and release must be performed by same task diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index d6fc84440b73..c8ee395dd6c6 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -23,7 +23,7 @@ extern void fpstate_init(struct fpu *fpu); extern void fpu__flush_thread(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern void math_state_restore(void); +extern void fpu__restore(void); extern bool irq_fpu_usable(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7add2fb7369e..15c3cf7bd160 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -228,7 +228,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) } /* - * 'math_state_restore()' saves the current math information in the + * 'fpu__restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. @@ -237,7 +237,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) * Must be called with kernel preemption disabled (eg with local * local interrupts as in the case of do_device_not_available). */ -void math_state_restore(void) +void fpu__restore(void) { struct task_struct *tsk = current; @@ -267,7 +267,7 @@ void math_state_restore(void) } kernel_fpu_enable(); } -EXPORT_SYMBOL_GPL(math_state_restore); +EXPORT_SYMBOL_GPL(fpu__restore); void fpu__flush_thread(struct task_struct *tsk) { diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 163b5cc582ef..d913d5024901 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -404,7 +404,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); if (use_eager_fpu()) { preempt_disable(); - math_state_restore(); + fpu__restore(); preempt_enable(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 84d647d4b14d..1a0edce626b2 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -295,7 +295,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up + * done before fpu__restore(), so the TS bit is up * to date. */ arch_end_context_switch(next_p); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ae6efeccb46e..99cc4b8589ad 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -298,7 +298,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Leave lazy mode, flushing any hypercalls made here. This * must be done after loading TLS entries in the GDT but before * loading segments that might reference them, and and it must - * be done before math_state_restore, so the TS bit is up to + * be done before fpu__restore(), so the TS bit is up to * date. */ arch_end_context_switch(next_p); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 63c7fc3677b4..22ad90a40dbf 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -846,7 +846,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) return; } #endif - math_state_restore(); /* interrupts still off */ + fpu__restore(); /* interrupts still off */ #ifdef CONFIG_X86_32 conditional_sti(regs); #endif diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 30f2aef69d78..bcb534a5512d 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -297,12 +297,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) /* * Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. - * math_state_restore() may sleep and we may even move off to + * fpu__restore() may sleep and we may even move off to * a different CPU. So all the critical stuff should be done * before this. */ else if (cpu->regs->trapnum == 7 && !user_has_fpu()) - math_state_restore(); + fpu__restore(); } /*H:130 -- cgit v1.2.3-59-g8ed1b From 4d1640927bd54aa118f91c2bcfe6c2de0e2ba2a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 13:44:25 +0200 Subject: x86/fpu: Factor out the FPU bug detection code into fpu__init_check_bugs() Move the boot-time FPU bug detection code to the other FPU boot time init code in fpu/init.c. No change in code size: text data bss dec hex filename 13044568 1884440 1130496 16059504 f50c70 vmlinux.before 13044568 1884440 1130496 16059504 f50c70 vmlinux.after Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 1 + arch/x86/kernel/cpu/bugs.c | 53 +------------------------------------- arch/x86/kernel/fpu/init.c | 63 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 52 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c8ee395dd6c6..89ae3e051741 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -24,6 +24,7 @@ extern void fpu__flush_thread(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void fpu__restore(void); +extern void fpu__init_check_bugs(void); extern bool irq_fpu_usable(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03445346ee0a..eb8be0c5823b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -17,52 +17,6 @@ #include #include -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - s32 fdiv_bug; - - kernel_fpu_begin(); - - /* - * trap_init() enabled FXSR and company _before_ testing for FP - * problems here. - * - * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug - */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&fdiv_bug) - : "m" (*&x), "m" (*&y)); - - kernel_fpu_end(); - - if (fdiv_bug) { - set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); - pr_warn("Hmm, FPU with FDIV bug\n"); - } -} - void __init check_bugs(void) { identify_boot_cpu(); @@ -85,10 +39,5 @@ void __init check_bugs(void) '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); - /* - * kernel_fpu_begin/end() in check_fpu() relies on the patched - * alternative instructions. - */ - if (cpu_has_fpu) - check_fpu(); + fpu__init_check_bugs(); } diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 5e06aa6cc22e..4eabb426e910 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -4,6 +4,69 @@ #include #include +/* + * Boot time CPU/FPU FDIV bug detection code: + */ + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + s32 fdiv_bug; + + kernel_fpu_begin(); + + /* + * trap_init() enabled FXSR and company _before_ testing for FP + * problems here. + * + * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug + */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&fdiv_bug) + : "m" (*&x), "m" (*&y)); + + kernel_fpu_end(); + + if (fdiv_bug) { + set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); + pr_warn("Hmm, FPU with FDIV bug\n"); + } +} + +void fpu__init_check_bugs(void) +{ + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + if (cpu_has_fpu) + check_fpu(); +} + +/* + * Boot time FPU feature detection code: + */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -- cgit v1.2.3-59-g8ed1b From 416d49ac67ae3af8c98ecee2ebe0a883b95e213a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 16:33:08 +0200 Subject: x86/fpu: Make kernel_fpu_disable/enable() static This allows the compiler to inline them and to eliminate them: arch/x86/kernel/fpu/core.o: text data bss dec hex filename 6741 4 8 6753 1a61 core.o.before 6716 4 8 6728 1a48 core.o.after Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 4 ---- arch/x86/kernel/fpu/core.c | 8 ++++---- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 89ae3e051741..e69989f95da5 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -54,10 +54,6 @@ static inline void kernel_fpu_end(void) preempt_enable(); } -/* Must be called with preempt disabled */ -extern void kernel_fpu_disable(void); -extern void kernel_fpu_enable(void); - /* * Some instructions like VIA's padlock instructions generate a spurious * DNA fault but don't modify SSE registers. And these instructions diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d0fcf741f70b..161820526ad3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,13 +9,13 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); -void kernel_fpu_disable(void) +static void kernel_fpu_disable(void) { WARN_ON(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); } -void kernel_fpu_enable(void) +static void kernel_fpu_enable(void) { this_cpu_write(in_kernel_fpu, false); } @@ -32,7 +32,7 @@ void kernel_fpu_enable(void) * Except for the eagerfpu case when we return true; in the likely case * the thread has FPU but we are not going to set/clear TS. */ -static inline bool interrupted_kernel_fpu_idle(void) +static bool interrupted_kernel_fpu_idle(void) { if (this_cpu_read(in_kernel_fpu)) return false; @@ -52,7 +52,7 @@ static inline bool interrupted_kernel_fpu_idle(void) * in an interrupt context from user mode - we'll just * save the FPU state as required. */ -static inline bool interrupted_user_mode(void) +static bool interrupted_user_mode(void) { struct pt_regs *regs = get_irq_regs(); return regs && user_mode(regs); -- cgit v1.2.3-59-g8ed1b From 0c070595ceccb391100127a28ff837c50356ad67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:57:24 +0200 Subject: x86/fpu: Use 'struct fpu' in fpu__save() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 2 +- arch/x86/kernel/fpu/core.c | 8 +++----- arch/x86/kernel/traps.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index e69989f95da5..e3b42c5379bc 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -100,7 +100,7 @@ static inline int user_has_fpu(void) return current->thread.fpu.has_fpu; } -extern void fpu__save(struct task_struct *tsk); +extern void fpu__save(struct fpu *fpu); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7c0530082253..b685e9e90491 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -142,11 +142,9 @@ static void __save_fpu(struct fpu *fpu) * * This only ever gets called for the current task. */ -void fpu__save(struct task_struct *tsk) +void fpu__save(struct fpu *fpu) { - struct fpu *fpu = &tsk->thread.fpu; - - WARN_ON(tsk != current); + WARN_ON(fpu != ¤t->thread.fpu); preempt_disable(); if (fpu->has_fpu) { @@ -240,7 +238,7 @@ static void fpu_copy(struct task_struct *dst, struct task_struct *src) memset(&dst->thread.fpu.state->xsave, 0, xstate_size); __save_fpu(dst_fpu); } else { - fpu__save(src); + fpu__save(src_fpu); memcpy(dst_fpu->state, src_fpu->state, xstate_size); } } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 22ad90a40dbf..8abcd6a6f3dc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -730,7 +730,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) /* * Save the info for the exception handler and clear the error. */ - fpu__save(task); + fpu__save(&task->thread.fpu); task->thread.trap_nr = trapnr; task->thread.error_code = error_code; info.si_signo = SIGFPE; -- cgit v1.2.3-59-g8ed1b From db2b1d3ad1cdae9f268d6db54b6127b09933da3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:13:09 +0200 Subject: x86/fpu: Use 'struct fpu' in fpstate_alloc_init() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 2 +- arch/x86/kernel/fpu/core.c | 13 ++++++------- arch/x86/kernel/fpu/xsave.c | 2 +- arch/x86/kvm/x86.c | 2 +- arch/x86/math-emu/fpu_entry.c | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index e3b42c5379bc..38376cdf297c 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -18,7 +18,7 @@ struct pt_regs; struct user_i387_struct; -extern int fpstate_alloc_init(struct task_struct *curr); +extern int fpstate_alloc_init(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); extern void fpu__flush_thread(struct task_struct *tsk); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a84358575235..183e69dfd4d0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -263,12 +263,11 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * * Can fail. */ -int fpstate_alloc_init(struct task_struct *curr) +int fpstate_alloc_init(struct fpu *fpu) { - struct fpu *fpu = &curr->thread.fpu; int ret; - if (WARN_ON_ONCE(curr != current)) + if (WARN_ON_ONCE(fpu != ¤t->thread.fpu)) return -EINVAL; if (WARN_ON_ONCE(fpu->fpstate_active)) return -EINVAL; @@ -276,11 +275,11 @@ int fpstate_alloc_init(struct task_struct *curr) /* * Memory allocation at the first usage of the FPU and other state. */ - ret = fpstate_alloc(&curr->thread.fpu); + ret = fpstate_alloc(fpu); if (ret) return ret; - fpstate_init(&curr->thread.fpu); + fpstate_init(fpu); /* Safe to do for the current task: */ fpu->fpstate_active = 1; @@ -360,7 +359,7 @@ void fpu__restore(void) /* * does a slab alloc which can sleep */ - if (fpstate_alloc_init(tsk)) { + if (fpstate_alloc_init(fpu)) { /* * ran out of memory! */ @@ -396,7 +395,7 @@ void fpu__flush_thread(struct task_struct *tsk) } else { if (!fpu->fpstate_active) { /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(fpstate_alloc_init(tsk))) + if (WARN_ON(fpstate_alloc_init(fpu))) force_sig(SIGKILL, tsk); user_fpu_begin(); } diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 3953cbf8d7e7..80b0c8fa50c5 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -350,7 +350,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!fpu->fpstate_active && fpstate_alloc_init(tsk)) + if (!fpu->fpstate_active && fpstate_alloc_init(fpu)) return -1; if (!static_cpu_has(X86_FEATURE_FPU)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bab8afb61dc1..479d4ce25081 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6601,7 +6601,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - if (!fpu->fpstate_active && fpstate_alloc_init(current)) + if (!fpu->fpstate_active && fpstate_alloc_init(fpu)) return -ENOMEM; if (vcpu->sigset_active) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index f1aac55d6a67..e394bcb4275d 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -150,7 +150,7 @@ void math_emulate(struct math_emu_info *info) struct fpu *fpu = ¤t->thread.fpu; if (!fpu->fpstate_active) { - if (fpstate_alloc_init(current)) { + if (fpstate_alloc_init(fpu)) { do_group_exit(SIGKILL); return; } -- cgit v1.2.3-59-g8ed1b From 2e8a3102662233dfac92fe70f56429b4050f674a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:28:23 +0200 Subject: x86/fpu: Rename fpu__flush_thread() to fpu__clear() The primary purpose of this function is to clear the current task's FPU before an exec(), to not leak information from the previous task, and to allow the new task to start with freshly initialized FPU registers. Rename the function to reflect this primary purpose. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/i387.h | 2 +- arch/x86/kernel/fpu/core.c | 4 ++-- arch/x86/kernel/process.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 38376cdf297c..b8f7d76ac066 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -20,7 +20,7 @@ struct user_i387_struct; extern int fpstate_alloc_init(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); -extern void fpu__flush_thread(struct task_struct *tsk); +extern void fpu__clear(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void fpu__restore(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e3e8585284ad..c15d064ce43e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -381,11 +381,11 @@ void fpu__restore(void) } EXPORT_SYMBOL_GPL(fpu__restore); -void fpu__flush_thread(struct task_struct *tsk) +void fpu__clear(struct task_struct *tsk) { struct fpu *fpu = &tsk->thread.fpu; - WARN_ON(tsk != current); + WARN_ON_ONCE(tsk != current); /* Almost certainly an anomaly */ if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e97266b18ad3..51ad3422e728 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -130,7 +130,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - fpu__flush_thread(tsk); + fpu__clear(tsk); } static void hard_disable_TSC(void) -- cgit v1.2.3-59-g8ed1b From df6b35f409af0a8ff1ef62f552b8402f3fef8665 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:46:00 +0200 Subject: x86/fpu: Rename i387.h to fpu/api.h We already have fpu/types.h, move i387.h to fpu/api.h. The file name has become a misnomer anyway: it offers generic FPU APIs, but is not limited to i387 functionality. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/crypto/aesni-intel_glue.c | 2 +- arch/x86/crypto/crc32-pclmul_glue.c | 2 +- arch/x86/crypto/crct10dif-pclmul_glue.c | 2 +- arch/x86/crypto/fpu.c | 2 +- arch/x86/crypto/ghash-clmulni-intel_glue.c | 2 +- arch/x86/crypto/sha1_ssse3_glue.c | 2 +- arch/x86/crypto/sha256_ssse3_glue.c | 2 +- arch/x86/crypto/sha512_ssse3_glue.c | 2 +- arch/x86/crypto/twofish_avx_glue.c | 2 +- arch/x86/include/asm/crypto/glue_helper.h | 2 +- arch/x86/include/asm/efi.h | 2 +- arch/x86/include/asm/fpu-internal.h | 2 +- arch/x86/include/asm/fpu/api.h | 107 +++++++++++++++++++++++++++++ arch/x86/include/asm/i387.h | 107 ----------------------------- arch/x86/include/asm/simd.h | 2 +- arch/x86/include/asm/suspend_32.h | 2 +- arch/x86/include/asm/suspend_64.h | 2 +- arch/x86/include/asm/xor.h | 2 +- arch/x86/include/asm/xor_32.h | 2 +- arch/x86/include/asm/xor_avx.h | 2 +- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/fpu/xsave.c | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/lguest/boot.c | 2 +- arch/x86/lib/mmx_32.c | 2 +- arch/x86/math-emu/fpu_entry.c | 2 +- drivers/char/hw_random/via-rng.c | 2 +- drivers/crypto/padlock-aes.c | 2 +- drivers/crypto/padlock-sha.c | 2 +- drivers/lguest/x86/core.c | 2 +- lib/raid6/x86.h | 2 +- 31 files changed, 136 insertions(+), 136 deletions(-) create mode 100644 arch/x86/include/asm/fpu/api.h delete mode 100644 arch/x86/include/asm/i387.h (limited to 'arch/x86/include/asm/i387.h') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 112cefacf2af..b419f43ce0c5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 1937fc1d8763..07d2c6c86a54 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -35,7 +35,7 @@ #include #include -#include +#include #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index b6c67bf30fdf..a3fcfc97a311 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index f368ba261739..5a2f30f9f52d 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include struct crypto_fpu_ctx { struct crypto_blkcipher *child; diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 2079baf06bdd..64d7cf1b50e1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define GHASH_BLOCK_SIZE 16 diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 33d1b9dc14cc..cb3bf19dca5a 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index ccc338881ee8..9eaf7abaf4dc 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index d9fa4c1e063f..e0d6a67f567d 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index b5e2d5651851..1a66e6110f4b 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 1eef55596e82..03bb1065c335 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -7,7 +7,7 @@ #include #include -#include +#include #include typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3738b138b843..155162ea0e00 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_EFI_H #define _ASM_X86_EFI_H -#include +#include #include /* diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 21ad68179454..d68b349b4247 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -15,7 +15,7 @@ #include #include -#include +#include #include #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h new file mode 100644 index 000000000000..9d3a6f3cfc1b --- /dev/null +++ b/arch/x86/include/asm/fpu/api.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_FPU_API_H +#define _ASM_X86_FPU_API_H + +#ifndef __ASSEMBLY__ + +#include +#include + +struct pt_regs; +struct user_i387_struct; + +extern int fpstate_alloc_init(struct fpu *fpu); +extern void fpstate_init(struct fpu *fpu); +extern void fpu__clear(struct task_struct *tsk); + +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +extern void fpu__restore(void); +extern void fpu__init_check_bugs(void); + +extern bool irq_fpu_usable(void); + +/* + * Careful: __kernel_fpu_begin/end() must be called with preempt disabled + * and they don't touch the preempt state on their own. + * If you enable preemption after __kernel_fpu_begin(), preempt notifier + * should call the __kernel_fpu_end() to prevent the kernel/user FPU + * state from getting corrupted. KVM for example uses this model. + * + * All other cases use kernel_fpu_begin/end() which disable preemption + * during kernel FPU usage. + */ +extern void __kernel_fpu_begin(void); +extern void __kernel_fpu_end(void); + +static inline void kernel_fpu_begin(void) +{ + preempt_disable(); + WARN_ON_ONCE(!irq_fpu_usable()); + __kernel_fpu_begin(); +} + +static inline void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} + +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context as well. To prevent these kernel instructions + * in interrupt context interacting wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +static inline int irq_ts_save(void) +{ + /* + * If in process context and not atomic, we can take a spurious DNA fault. + * Otherwise, doing clts() in process context requires disabling preemption + * or some heavy lifting like kernel_fpu_begin() + */ + if (!in_atomic()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} + +static inline void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} + +/* + * The question "does this thread have fpu access?" + * is slightly racy, since preemption could come in + * and revoke it immediately after the test. + * + * However, even in that very unlikely scenario, + * we can just assume we have FPU access - typically + * to save the FP state - we'll just take a #NM + * fault and get the FPU access back. + */ +static inline int user_has_fpu(void) +{ + return current->thread.fpu.has_fpu; +} + +extern void fpu__save(struct fpu *fpu); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h deleted file mode 100644 index b8f7d76ac066..000000000000 --- a/arch/x86/include/asm/i387.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _ASM_X86_I387_H -#define _ASM_X86_I387_H - -#ifndef __ASSEMBLY__ - -#include -#include - -struct pt_regs; -struct user_i387_struct; - -extern int fpstate_alloc_init(struct fpu *fpu); -extern void fpstate_init(struct fpu *fpu); -extern void fpu__clear(struct task_struct *tsk); - -extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern void fpu__restore(void); -extern void fpu__init_check_bugs(void); - -extern bool irq_fpu_usable(void); - -/* - * Careful: __kernel_fpu_begin/end() must be called with preempt disabled - * and they don't touch the preempt state on their own. - * If you enable preemption after __kernel_fpu_begin(), preempt notifier - * should call the __kernel_fpu_end() to prevent the kernel/user FPU - * state from getting corrupted. KVM for example uses this model. - * - * All other cases use kernel_fpu_begin/end() which disable preemption - * during kernel FPU usage. - */ -extern void __kernel_fpu_begin(void); -extern void __kernel_fpu_end(void); - -static inline void kernel_fpu_begin(void) -{ - preempt_disable(); - WARN_ON_ONCE(!irq_fpu_usable()); - __kernel_fpu_begin(); -} - -static inline void kernel_fpu_end(void) -{ - __kernel_fpu_end(); - preempt_enable(); -} - -/* - * Some instructions like VIA's padlock instructions generate a spurious - * DNA fault but don't modify SSE registers. And these instructions - * get used from interrupt context as well. To prevent these kernel instructions - * in interrupt context interacting wrongly with other user/kernel fpu usage, we - * should use them only in the context of irq_ts_save/restore() - */ -static inline int irq_ts_save(void) -{ - /* - * If in process context and not atomic, we can take a spurious DNA fault. - * Otherwise, doing clts() in process context requires disabling preemption - * or some heavy lifting like kernel_fpu_begin() - */ - if (!in_atomic()) - return 0; - - if (read_cr0() & X86_CR0_TS) { - clts(); - return 1; - } - - return 0; -} - -static inline void irq_ts_restore(int TS_state) -{ - if (TS_state) - stts(); -} - -/* - * The question "does this thread have fpu access?" - * is slightly racy, since preemption could come in - * and revoke it immediately after the test. - * - * However, even in that very unlikely scenario, - * we can just assume we have FPU access - typically - * to save the FP state - we'll just take a #NM - * fault and get the FPU access back. - */ -static inline int user_has_fpu(void) -{ - return current->thread.fpu.has_fpu; -} - -extern void fpu__save(struct fpu *fpu); - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_I387_H */ diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h index ee80b92f0096..6c8a7ed13365 100644 --- a/arch/x86/include/asm/simd.h +++ b/arch/x86/include/asm/simd.h @@ -1,5 +1,5 @@ -#include +#include /* * may_use_simd - whether it is allowable at this time to issue SIMD diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 552d6c90a6d4..d1793f06854d 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -7,7 +7,7 @@ #define _ASM_X86_SUSPEND_32_H #include -#include +#include /* image of the saved processor state */ struct saved_context { diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index bc6232834bab..7ebf0ebe4e68 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -7,7 +7,7 @@ #define _ASM_X86_SUSPEND_64_H #include -#include +#include /* * Image of the saved processor state, used by the low level ACPI suspend to diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index d8829751b3f8..1f5c5161ead6 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -36,7 +36,7 @@ * no advantages to be gotten from x86-64 here anyways. */ -#include +#include #ifdef CONFIG_X86_32 /* reduce register pressure */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index ce05722e3c68..5a08bc8bff33 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -26,7 +26,7 @@ #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" -#include +#include static void xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 492b29802f57..7c0a517ec751 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -18,7 +18,7 @@ #ifdef CONFIG_AS_AVX #include -#include +#include #define BLOCK4(i) \ BLOCK(32 * i, 0) \ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index eb8be0c5823b..29dd74318ec6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 80b0c8fa50c5..8aa3b864a2e0 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b61687bd79..5cb738a18ca3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133cc099..27f8eea0d6eb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #include #include /* for struct machine_ops */ #include diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index c9f2d9ba8dd8..e5e3ed8dc079 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include void *_mmx_memcpy(void *to, const void *from, size_t len) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index e394bcb4275d..3bb4c6a24ea5 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "fpu_system.h" #include "fpu_emu.h" diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index a3bebef255ad..0c98a9d51a24 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index c178ed8c3908..da2d6777bd09 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include /* * Number of data blocks actually fetched for each xcrypt insn. diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 95f7d27ce491..4e154c9b9206 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include struct padlock_sha_desc { struct shash_desc fallback; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index bcb534a5512d..fce5989e66d9 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include "../lg.h" diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index b7595484a815..8fe9d9662abb 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -23,7 +23,7 @@ #ifdef __KERNEL__ /* Real code */ -#include +#include #else /* Dummy code for user space testing */ -- cgit v1.2.3-59-g8ed1b