diff options
| author | 2012-02-28 12:42:14 +0000 | |
|---|---|---|
| committer | 2012-02-28 12:42:21 +0000 | |
| commit | fb0b82b32ce17564bc64cede50bf4a3204eecc60 (patch) | |
| tree | 00b5e466074c6fb373d64c493b3341186024acc7 /arch/x86/include | |
| parent | Merge branch 'kirkwood/board' into next/boards (diff) | |
| parent | ARM: pxa: add dummy clock for pxa25x and pxa27x (diff) | |
| download | wireguard-linux-fb0b82b32ce17564bc64cede50bf4a3204eecc60.tar.xz wireguard-linux-fb0b82b32ce17564bc64cede50bf4a3204eecc60.zip  | |
Merge branch 'board-specific' of git://github.com/hzhuang1/linux into next/boards
* 'board-specific' of git://github.com/hzhuang1/linux: (5 commits)
  ARM: pxa: add dummy clock for pxa25x and pxa27x
  ARM: mmp: append irq name of gpio device
  pxa/hx4700: Fix PXA_GPIO_IRQ_BASE/IRQ_NUM values
  pxa/hx4700: Add ASIC3 LED support
  pxa/hx4700: Correct StrataFlash block size discovery
(update to v3.3-rc5)
Diffstat (limited to 'arch/x86/include')
| -rw-r--r-- | arch/x86/include/asm/i387.h | 307 | ||||
| -rw-r--r-- | arch/x86/include/asm/processor.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/thread_info.h | 2 | 
3 files changed, 254 insertions, 57 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6919e936345b..247904945d3f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -29,10 +29,11 @@ extern unsigned int sig_xstate_size;  extern void fpu_init(void);  extern void mxcsr_feature_mask_init(void);  extern int init_fpu(struct task_struct *child); -extern asmlinkage void math_state_restore(void); -extern void __math_state_restore(void); +extern void math_state_restore(void);  extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); +  extern user_regset_active_fn fpregs_active, xfpregs_active;  extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,  				xstateregs_get; @@ -212,19 +213,11 @@ static inline void fpu_fxsave(struct fpu *fpu)  #endif	/* CONFIG_X86_64 */ -/* We need a safe address that is cheap to find and that is already -   in L1 during context switch. The best choices are unfortunately -   different for UP and SMP */ -#ifdef CONFIG_SMP -#define safe_address (__per_cpu_offset[0]) -#else -#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER]) -#endif -  /* - * These must be called with preempt disabled + * These must be called with preempt disabled. Returns + * 'true' if the FPU state is still intact.   */ -static inline void fpu_save_init(struct fpu *fpu) +static inline int fpu_save_init(struct fpu *fpu)  {  	if (use_xsave()) {  		fpu_xsave(fpu); @@ -233,33 +226,33 @@ static inline void fpu_save_init(struct fpu *fpu)  		 * xsave header may indicate the init state of the FP.  		 */  		if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) -			return; +			return 1;  	} else if (use_fxsr()) {  		fpu_fxsave(fpu);  	} else {  		asm volatile("fnsave %[fx]; fwait"  			     : [fx] "=m" (fpu->state->fsave)); -		return; +		return 0;  	} -	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) +	/* +	 * If exceptions are pending, we need to clear them so +	 * that we don't randomly get exceptions later. +	 * +	 * FIXME! Is this perhaps only true for the old-style +	 * irq13 case? Maybe we could leave the x87 state +	 * intact otherwise? +	 */ +	if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {  		asm volatile("fnclex"); - -	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception -	   is pending.  Clear the x87 state here by setting it to fixed -	   values. safe_address is a random variable that should be in L1 */ -	alternative_input( -		ASM_NOP8 ASM_NOP2, -		"emms\n\t"	  	/* clear stack tags */ -		"fildl %P[addr]",	/* set F?P to defined value */ -		X86_FEATURE_FXSAVE_LEAK, -		[addr] "m" (safe_address)); +		return 0; +	} +	return 1;  } -static inline void __save_init_fpu(struct task_struct *tsk) +static inline int __save_init_fpu(struct task_struct *tsk)  { -	fpu_save_init(&tsk->thread.fpu); -	task_thread_info(tsk)->status &= ~TS_USEDFPU; +	return fpu_save_init(&tsk->thread.fpu);  }  static inline int fpu_fxrstor_checking(struct fpu *fpu) @@ -277,44 +270,212 @@ static inline int fpu_restore_checking(struct fpu *fpu)  static inline int restore_fpu_checking(struct task_struct *tsk)  { +	/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception +	   is pending.  Clear the x87 state here by setting it to fixed +	   values. "m" is a random variable that should be in L1 */ +	alternative_input( +		ASM_NOP8 ASM_NOP2, +		"emms\n\t"	  	/* clear stack tags */ +		"fildl %P[addr]",	/* set F?P to defined value */ +		X86_FEATURE_FXSAVE_LEAK, +		[addr] "m" (tsk->thread.fpu.has_fpu)); +  	return fpu_restore_checking(&tsk->thread.fpu);  }  /* - * Signal frame handlers... + * Software FPU state helpers. Careful: these need to + * be preemption protection *and* they need to be + * properly paired with the CR0.TS changes!   */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +static inline int __thread_has_fpu(struct task_struct *tsk) +{ +	return tsk->thread.fpu.has_fpu; +} -static inline void __unlazy_fpu(struct task_struct *tsk) +/* Must be paired with an 'stts' after! */ +static inline void __thread_clear_has_fpu(struct task_struct *tsk)  { -	if (task_thread_info(tsk)->status & TS_USEDFPU) { -		__save_init_fpu(tsk); -		stts(); -	} else -		tsk->fpu_counter = 0; +	tsk->thread.fpu.has_fpu = 0; +	percpu_write(fpu_owner_task, NULL); +} + +/* Must be paired with a 'clts' before! */ +static inline void __thread_set_has_fpu(struct task_struct *tsk) +{ +	tsk->thread.fpu.has_fpu = 1; +	percpu_write(fpu_owner_task, tsk); +} + +/* + * Encapsulate the CR0.TS handling together with the + * software flag. + * + * These generally need preemption protection to work, + * do try to avoid using these on their own. + */ +static inline void __thread_fpu_end(struct task_struct *tsk) +{ +	__thread_clear_has_fpu(tsk); +	stts(); +} + +static inline void __thread_fpu_begin(struct task_struct *tsk) +{ +	clts(); +	__thread_set_has_fpu(tsk); +} + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + *  - switch_fpu_prepare() saves the old state and + *    sets the new state of the CR0.TS bit. This is + *    done within the context of the old process. + * + *  - switch_fpu_finish() restores the new state as + *    necessary. + */ +typedef struct { int preload; } fpu_switch_t; + +/* + * FIXME! We could do a totally lazy restore, but we need to + * add a per-cpu "this was the task that last touched the FPU + * on this CPU" variable, and the task needs to have a "I last + * touched the FPU on this CPU" and check them. + * + * We don't do that yet, so "fpu_lazy_restore()" always returns + * false, but some day.. + */ +static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) +{ +	return new == percpu_read_stable(fpu_owner_task) && +		cpu == new->thread.fpu.last_cpu; +} + +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) +{ +	fpu_switch_t fpu; + +	fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; +	if (__thread_has_fpu(old)) { +		if (!__save_init_fpu(old)) +			cpu = ~0; +		old->thread.fpu.last_cpu = cpu; +		old->thread.fpu.has_fpu = 0;	/* But leave fpu_owner_task! */ + +		/* Don't change CR0.TS if we just switch! */ +		if (fpu.preload) { +			new->fpu_counter++; +			__thread_set_has_fpu(new); +			prefetch(new->thread.fpu.state); +		} else +			stts(); +	} else { +		old->fpu_counter = 0; +		old->thread.fpu.last_cpu = ~0; +		if (fpu.preload) { +			new->fpu_counter++; +			if (fpu_lazy_restore(new, cpu)) +				fpu.preload = 0; +			else +				prefetch(new->thread.fpu.state); +			__thread_fpu_begin(new); +		} +	} +	return fpu; +} + +/* + * By the time this gets called, we've already cleared CR0.TS and + * given the process the FPU if we are going to preload the FPU + * state - all we need to do is to conditionally restore the register + * state itself. + */ +static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) +{ +	if (fpu.preload) { +		if (unlikely(restore_fpu_checking(new))) +			__thread_fpu_end(new); +	}  } +/* + * Signal frame handlers... + */ +extern int save_i387_xstate(void __user *buf); +extern int restore_i387_xstate(void __user *buf); +  static inline void __clear_fpu(struct task_struct *tsk)  { -	if (task_thread_info(tsk)->status & TS_USEDFPU) { +	if (__thread_has_fpu(tsk)) {  		/* Ignore delayed exceptions from user space */  		asm volatile("1: fwait\n"  			     "2:\n"  			     _ASM_EXTABLE(1b, 2b)); -		task_thread_info(tsk)->status &= ~TS_USEDFPU; -		stts(); +		__thread_fpu_end(tsk);  	}  } +/* + * Were we in an interrupt that interrupted kernel mode? + * + * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * pair does nothing at all: the thread must not have fpu (so + * that we don't try to save the FPU state), and TS must + * be set (so that the clts/stts pair does nothing that is + * visible in the interrupted kernel thread). + */ +static inline bool interrupted_kernel_fpu_idle(void) +{ +	return !__thread_has_fpu(current) && +		(read_cr0() & X86_CR0_TS); +} + +/* + * Were we in user mode (or vm86 mode) when we were + * interrupted? + * + * Doing kernel_fpu_begin/end() is ok if we are running + * in an interrupt context from user mode - we'll just + * save the FPU state as required. + */ +static inline bool interrupted_user_mode(void) +{ +	struct pt_regs *regs = get_irq_regs(); +	return regs && user_mode_vm(regs); +} + +/* + * Can we use the FPU in kernel mode with the + * whole "kernel_fpu_begin/end()" sequence? + * + * It's always ok in process context (ie "not interrupt") + * but it is sometimes ok even from an irq. + */ +static inline bool irq_fpu_usable(void) +{ +	return !in_interrupt() || +		interrupted_user_mode() || +		interrupted_kernel_fpu_idle(); +} +  static inline void kernel_fpu_begin(void)  { -	struct thread_info *me = current_thread_info(); +	struct task_struct *me = current; + +	WARN_ON_ONCE(!irq_fpu_usable());  	preempt_disable(); -	if (me->status & TS_USEDFPU) -		__save_init_fpu(me->task); -	else +	if (__thread_has_fpu(me)) { +		__save_init_fpu(me); +		__thread_clear_has_fpu(me); +		/* We do 'stts()' in kernel_fpu_end() */ +	} else { +		percpu_write(fpu_owner_task, NULL);  		clts(); +	}  }  static inline void kernel_fpu_end(void) @@ -323,14 +484,6 @@ static inline void kernel_fpu_end(void)  	preempt_enable();  } -static inline bool irq_fpu_usable(void) -{ -	struct pt_regs *regs; - -	return !in_interrupt() || !(regs = get_irq_regs()) || \ -		user_mode(regs) || (read_cr0() & X86_CR0_TS); -} -  /*   * Some instructions like VIA's padlock instructions generate a spurious   * DNA fault but don't modify SSE registers. And these instructions @@ -363,20 +516,64 @@ static inline void irq_ts_restore(int TS_state)  }  /* + * The question "does this thread have fpu access?" + * is slightly racy, since preemption could come in + * and revoke it immediately after the test. + * + * However, even in that very unlikely scenario, + * we can just assume we have FPU access - typically + * to save the FP state - we'll just take a #NM + * fault and get the FPU access back. + * + * The actual user_fpu_begin/end() functions + * need to be preemption-safe, though. + * + * NOTE! user_fpu_end() must be used only after you + * have saved the FP state, and user_fpu_begin() must + * be used only immediately before restoring it. + * These functions do not do any save/restore on + * their own. + */ +static inline int user_has_fpu(void) +{ +	return __thread_has_fpu(current); +} + +static inline void user_fpu_end(void) +{ +	preempt_disable(); +	__thread_fpu_end(current); +	preempt_enable(); +} + +static inline void user_fpu_begin(void) +{ +	preempt_disable(); +	if (!user_has_fpu()) +		__thread_fpu_begin(current); +	preempt_enable(); +} + +/*   * These disable preemption on their own and are safe   */  static inline void save_init_fpu(struct task_struct *tsk)  { +	WARN_ON_ONCE(!__thread_has_fpu(tsk));  	preempt_disable();  	__save_init_fpu(tsk); -	stts(); +	__thread_fpu_end(tsk);  	preempt_enable();  }  static inline void unlazy_fpu(struct task_struct *tsk)  {  	preempt_disable(); -	__unlazy_fpu(tsk); +	if (__thread_has_fpu(tsk)) { +		__save_init_fpu(tsk); +		__thread_fpu_end(tsk); +	} else +		tsk->fpu_counter = 0;  	preempt_enable();  } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index aa9088c26931..58545c97d071 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -374,6 +374,8 @@ union thread_xstate {  };  struct fpu { +	unsigned int last_cpu; +	unsigned int has_fpu;  	union thread_xstate *state;  }; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index bc817cd8b443..cfd8144d5527 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -247,8 +247,6 @@ static inline struct thread_info *current_thread_info(void)   * ever touches our thread-synchronous status, so we don't   * have to worry about atomic accesses.   */ -#define TS_USEDFPU		0x0001	/* FPU was used by this task -					   this quantum (SMP) */  #define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/  #define TS_POLLING		0x0004	/* idle task polling need_resched,  					   skip sending interrupt */  | 
