From 464d1a78fbf8cf6c7fd970e7b3e2db50a320ce28 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 13 Feb 2007 13:26:20 +0100 Subject: [PATCH] i386: Convert i386 PDA code to use %fs Convert the PDA code to use %fs rather than %gs as the segment for per-processor data. This is because some processors show a small but measurable performance gain for reloading a NULL segment selector (as %fs generally is in user-space) versus a non-NULL one (as %gs generally is). On modern processors the difference is very small, perhaps undetectable. Some old AMD "K6 3D+" processors are noticably slower when %fs is used rather than %gs; I have no idea why this might be, but I think they're sufficiently rare that it doesn't matter much. This patch also fixes the math emulator, which had not been adjusted to match the changed struct pt_regs. [frederik.deweerdt@gmail.com: fixit with gdb] [mingo@elte.hu: Fix KVM too] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Ian Campbell Acked-by: Ingo Molnar Acked-by: Zachary Amsden Cc: Eric Dumazet Signed-off-by: Frederik Deweerdt Signed-off-by: Andrew Morton --- arch/i386/kernel/asm-offsets.c | 2 +- arch/i386/kernel/cpu/common.c | 14 +++++++------- arch/i386/kernel/entry.S | 32 ++++++++++++++++---------------- arch/i386/kernel/head.S | 6 +++--- arch/i386/kernel/kprobes.c | 4 ++-- arch/i386/kernel/process.c | 24 +++++++++++------------- arch/i386/kernel/ptrace.c | 16 ++++++++-------- arch/i386/kernel/signal.c | 10 +++++----- arch/i386/kernel/traps.c | 7 ++++--- arch/i386/kernel/vm86.c | 33 +++++++++++++++++---------------- arch/i386/math-emu/get_address.c | 14 +++++--------- 11 files changed, 79 insertions(+), 83 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 1b2f3cd33270..c37535163bfc 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -72,7 +72,7 @@ void foo(void) OFFSET(PT_EAX, pt_regs, eax); OFFSET(PT_DS, pt_regs, xds); OFFSET(PT_ES, pt_regs, xes); - OFFSET(PT_GS, pt_regs, xgs); + OFFSET(PT_FS, pt_regs, xfs); OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); OFFSET(PT_EIP, pt_regs, eip); OFFSET(PT_CS, pt_regs, xcs); diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 8a8bbdaaf38a..dcbbd0a8bfc2 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -605,7 +605,7 @@ void __init early_cpu_init(void) struct pt_regs * __devinit idle_regs(struct pt_regs *regs) { memset(regs, 0, sizeof(struct pt_regs)); - regs->xgs = __KERNEL_PDA; + regs->xfs = __KERNEL_PDA; return regs; } @@ -662,12 +662,12 @@ struct i386_pda boot_pda = { .pcurrent = &init_task, }; -static inline void set_kernel_gs(void) +static inline void set_kernel_fs(void) { - /* Set %gs for this CPU's PDA. Memory clobber is to create a + /* Set %fs for this CPU's PDA. Memory clobber is to create a barrier with respect to any PDA operations, so the compiler doesn't move any before here. */ - asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); + asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); } /* Initialize the CPU's GDT and PDA. The boot CPU does this for @@ -718,7 +718,7 @@ void __cpuinit cpu_set_gdt(int cpu) the boot CPU, this will transition from the boot gdt+pda to the real ones). */ load_gdt(cpu_gdt_descr); - set_kernel_gs(); + set_kernel_fs(); } /* Common CPU init for both boot and secondary CPUs */ @@ -764,8 +764,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); #endif - /* Clear %fs. */ - asm volatile ("mov %0, %%fs" : : "r" (0)); + /* Clear %gs. */ + asm volatile ("mov %0, %%gs" : : "r" (0)); /* Clear all 6 debug registers: */ set_debugreg(0, 0); diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5e47683fc63a..8c6a22a42d2e 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -30,7 +30,7 @@ * 18(%esp) - %eax * 1C(%esp) - %ds * 20(%esp) - %es - * 24(%esp) - %gs + * 24(%esp) - %fs * 28(%esp) - orig_eax * 2C(%esp) - %eip * 30(%esp) - %cs @@ -99,9 +99,9 @@ VM_MASK = 0x00020000 #define SAVE_ALL \ cld; \ - pushl %gs; \ + pushl %fs; \ CFI_ADJUST_CFA_OFFSET 4;\ - /*CFI_REL_OFFSET gs, 0;*/\ + /*CFI_REL_OFFSET fs, 0;*/\ pushl %es; \ CFI_ADJUST_CFA_OFFSET 4;\ /*CFI_REL_OFFSET es, 0;*/\ @@ -133,7 +133,7 @@ VM_MASK = 0x00020000 movl %edx, %ds; \ movl %edx, %es; \ movl $(__KERNEL_PDA), %edx; \ - movl %edx, %gs + movl %edx, %fs #define RESTORE_INT_REGS \ popl %ebx; \ @@ -166,9 +166,9 @@ VM_MASK = 0x00020000 2: popl %es; \ CFI_ADJUST_CFA_OFFSET -4;\ /*CFI_RESTORE es;*/\ -3: popl %gs; \ +3: popl %fs; \ CFI_ADJUST_CFA_OFFSET -4;\ - /*CFI_RESTORE gs;*/\ + /*CFI_RESTORE fs;*/\ .pushsection .fixup,"ax"; \ 4: movl $0,(%esp); \ jmp 1b; \ @@ -349,11 +349,11 @@ sysenter_past_esp: movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON -1: mov PT_GS(%esp), %gs +1: mov PT_FS(%esp), %fs ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC .pushsection .fixup,"ax" -2: movl $0,PT_GS(%esp) +2: movl $0,PT_FS(%esp) jmp 1b .section __ex_table,"a" .align 4 @@ -550,7 +550,7 @@ syscall_badsys: #define FIXUP_ESPFIX_STACK \ /* since we are on a wrong stack, we cant make it a C code :( */ \ - movl %gs:PDA_cpu, %ebx; \ + movl %fs:PDA_cpu, %ebx; \ PER_CPU(cpu_gdt_descr, %ebx); \ movl GDS_address(%ebx), %ebx; \ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ @@ -632,7 +632,7 @@ KPROBE_ENTRY(page_fault) CFI_ADJUST_CFA_OFFSET 4 ALIGN error_code: - /* the function address is in %gs's slot on the stack */ + /* the function address is in %fs's slot on the stack */ pushl %es CFI_ADJUST_CFA_OFFSET 4 /*CFI_REL_OFFSET es, 0*/ @@ -661,20 +661,20 @@ error_code: CFI_ADJUST_CFA_OFFSET 4 CFI_REL_OFFSET ebx, 0 cld - pushl %gs + pushl %fs CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET gs, 0*/ + /*CFI_REL_OFFSET fs, 0*/ movl $(__KERNEL_PDA), %ecx - movl %ecx, %gs + movl %ecx, %fs UNWIND_ESPFIX_STACK popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ - movl PT_GS(%esp), %edi # get the function address + movl PT_FS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_GS(%esp) - /*CFI_REL_OFFSET gs, ES*/ + mov %ecx, PT_FS(%esp) + /*CFI_REL_OFFSET fs, ES*/ movl $(__USER_DS), %ecx movl %ecx, %ds movl %ecx, %es diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index cb9abdfced9b..15336c8b5960 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -319,12 +319,12 @@ is386: movl $2,%ecx # set MP movl %eax,%ds movl %eax,%es - xorl %eax,%eax # Clear FS and LDT - movl %eax,%fs + xorl %eax,%eax # Clear GS and LDT + movl %eax,%gs lldt %ax movl $(__KERNEL_PDA),%eax - mov %eax,%gs + mov %eax,%fs cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index af1d53344993..b85cfa3ce1dd 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -363,7 +363,7 @@ no_kprobe: " pushf\n" /* skip cs, eip, orig_eax */ " subl $12, %esp\n" - " pushl %gs\n" + " pushl %fs\n" " pushl %ds\n" " pushl %es\n" " pushl %eax\n" @@ -387,7 +387,7 @@ no_kprobe: " popl %edi\n" " popl %ebp\n" " popl %eax\n" - /* skip eip, orig_eax, es, ds, gs */ + /* skip eip, orig_eax, es, ds, fs */ " addl $20, %esp\n" " popf\n" " ret\n"); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c641056233a6..23ae198dbbc3 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -308,8 +308,8 @@ void show_regs(struct pt_regs * regs) regs->eax,regs->ebx,regs->ecx,regs->edx); printk("ESI: %08lx EDI: %08lx EBP: %08lx", regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x GS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); + printk(" DS: %04x ES: %04x FS: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); cr0 = read_cr0(); cr2 = read_cr2(); @@ -340,7 +340,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xds = __USER_DS; regs.xes = __USER_DS; - regs.xgs = __KERNEL_PDA; + regs.xfs = __KERNEL_PDA; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -425,7 +425,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, p->thread.eip = (unsigned long) ret_from_fork; - savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -501,8 +501,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->regs.eax = regs->eax; dump->regs.ds = regs->xds; dump->regs.es = regs->xes; - savesegment(fs,dump->regs.fs); - dump->regs.gs = regs->xgs; + dump->regs.fs = regs->xfs; + savesegment(gs,dump->regs.gs); dump->regs.orig_eax = regs->orig_eax; dump->regs.eip = regs->eip; dump->regs.cs = regs->xcs; @@ -653,7 +653,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas load_esp0(tss, next); /* - * Save away %fs. No need to save %gs, as it was saved on the + * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this * before setting the new TLS descriptors avoids the situation @@ -662,7 +662,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - savesegment(fs, prev->fs); + savesegment(gs, prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -670,12 +670,10 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas load_TLS(next, cpu); /* - * Restore %fs if needed. - * - * Glibc normally makes %fs be zero. + * Restore %gs if needed (which is common) */ - if (unlikely(prev->fs | next->fs)) - loadsegment(fs, next->fs); + if (prev->gs | next->gs) + loadsegment(gs, next->gs); write_pda(pcurrent, next_p); diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index af8aabe85800..4a8f8a259723 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -89,14 +89,14 @@ static int putreg(struct task_struct *child, unsigned long regno, unsigned long value) { switch (regno >> 2) { - case FS: + case GS: if (value && (value & 3) != 3) return -EIO; - child->thread.fs = value; + child->thread.gs = value; return 0; case DS: case ES: - case GS: + case FS: if (value && (value & 3) != 3) return -EIO; value &= 0xffff; @@ -112,7 +112,7 @@ static int putreg(struct task_struct *child, value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; break; } - if (regno > ES*4) + if (regno > FS*4) regno -= 1*4; put_stack_long(child, regno, value); return 0; @@ -124,18 +124,18 @@ static unsigned long getreg(struct task_struct *child, unsigned long retval = ~0UL; switch (regno >> 2) { - case FS: - retval = child->thread.fs; + case GS: + retval = child->thread.gs; break; case DS: case ES: - case GS: + case FS: case SS: case CS: retval = 0xffff; /* fall through */ default: - if (regno > ES*4) + if (regno > FS*4) regno -= 1*4; retval &= get_stack_long(child, regno); } diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 65d7620eaa09..8f4afcc7d2ab 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -128,8 +128,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) - COPY_SEG(gs); - GET_SEG(fs); + GET_SEG(gs); + COPY_SEG(fs); COPY_SEG(es); COPY_SEG(ds); COPY(edi); @@ -244,9 +244,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, { int tmp, err = 0; - err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); - savesegment(fs, tmp); - err |= __put_user(tmp, (unsigned int __user *)&sc->fs); + err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs); + savesegment(gs, tmp); + err |= __put_user(tmp, (unsigned int __user *)&sc->gs); err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 0efad8aeb41a..4ec21037a361 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -291,10 +291,11 @@ void show_registers(struct pt_regs *regs) int i; int in_kernel = 1; unsigned long esp; - unsigned short ss; + unsigned short ss, gs; esp = (unsigned long) (®s->esp); savesegment(ss, ss); + savesegment(gs, gs); if (user_mode_vm(regs)) { in_kernel = 0; esp = regs->esp; @@ -313,8 +314,8 @@ void show_registers(struct pt_regs *regs) regs->eax, regs->ebx, regs->ecx, regs->edx); printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, ss); + printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", TASK_COMM_LEN, current->comm, current->pid, current_thread_info(), current, current->thread_info); diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index be2f96e67f78..d1b8f2b7aea6 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -96,12 +96,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user, { int ret = 0; - /* kernel_vm86_regs is missing xfs, so copy everything up to - (but not including) xgs, and then rest after xgs. */ - ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); - ret += copy_to_user(&user->__null_gs, ®s->pt.xgs, + /* kernel_vm86_regs is missing xgs, so copy everything up to + (but not including) orig_eax, and then rest including orig_eax. */ + ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax)); + ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax, sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.xgs)); + offsetof(struct kernel_vm86_regs, pt.orig_eax)); return ret; } @@ -113,12 +113,13 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, { int ret = 0; - ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); - ret += copy_from_user(®s->pt.xgs, &user->__null_gs, + /* copy eax-xfs inclusive */ + ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax)); + /* copy orig_eax-__gsh+extra */ + ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax, sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.xgs) + + offsetof(struct kernel_vm86_regs, pt.orig_eax) + extra); - return ret; } @@ -157,8 +158,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) ret = KVM86->regs32; - loadsegment(fs, current->thread.saved_fs); - ret->xgs = current->thread.saved_gs; + ret->xfs = current->thread.saved_fs; + loadsegment(gs, current->thread.saved_gs); return ret; } @@ -285,9 +286,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk */ info->regs.pt.xds = 0; info->regs.pt.xes = 0; - info->regs.pt.xgs = 0; + info->regs.pt.xfs = 0; -/* we are clearing fs later just before "jmp resume_userspace", +/* we are clearing gs later just before "jmp resume_userspace", * because it is not saved/restored. */ @@ -321,8 +322,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk */ info->regs32->eax = 0; tsk->thread.saved_esp0 = tsk->thread.esp0; - savesegment(fs, tsk->thread.saved_fs); - tsk->thread.saved_gs = info->regs32->xgs; + tsk->thread.saved_fs = info->regs32->xfs; + savesegment(gs, tsk->thread.saved_gs); tss = &per_cpu(init_tss, get_cpu()); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; @@ -342,7 +343,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk __asm__ __volatile__( "movl %0,%%esp\n\t" "movl %1,%%ebp\n\t" - "mov %2, %%fs\n\t" + "mov %2, %%gs\n\t" "jmp resume_userspace" : /* no outputs */ :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c index 9819b705efa4..2e2c51a8bd3a 100644 --- a/arch/i386/math-emu/get_address.c +++ b/arch/i386/math-emu/get_address.c @@ -56,15 +56,14 @@ static int reg_offset_vm86[] = { #define VM86_REG_(x) (*(unsigned short *) \ (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) -/* These are dummy, fs and gs are not saved on the stack. */ -#define ___FS ___ds +/* This dummy, gs is not saved on the stack. */ #define ___GS ___ds static int reg_offset_pm[] = { offsetof(struct info,___cs), offsetof(struct info,___ds), offsetof(struct info,___es), - offsetof(struct info,___FS), + offsetof(struct info,___fs), offsetof(struct info,___GS), offsetof(struct info,___ss), offsetof(struct info,___ds) @@ -169,13 +168,10 @@ static long pm_address(u_char FPU_modrm, u_char segment, switch ( segment ) { - /* fs and gs aren't used by the kernel, so they still have their - user-space values. */ - case PREFIX_FS_-1: - /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ - savesegment(fs, addr->selector); - break; + /* gs isn't used by the kernel, so it still has its + user-space value. */ case PREFIX_GS_-1: + /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ savesegment(gs, addr->selector); break; default: -- cgit v1.2.3-59-g8ed1b