From 05e28f9de65a38bb0c769080e91b6976e7e1e70c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jan 2006 18:30:13 -0800 Subject: [SPARC64]: No need to D-cache color page tables any longer. Unlike the virtual page tables, the new TSB scheme does not require this ugly hack. Signed-off-by: David S. Miller --- include/asm-sparc64/cpudata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 74de79dca915..45a9a2cfaf79 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -20,8 +20,9 @@ typedef struct { /* Dcache line 2 */ unsigned int pgcache_size; unsigned int __pad1; - unsigned long *pte_cache[2]; + unsigned long *pte_cache; unsigned long *pgd_cache; + unsigned long __pad2; /* Dcache line 3, rarely used */ unsigned int dcache_size; @@ -30,8 +31,8 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - unsigned int __pad2; unsigned int __pad3; + unsigned int __pad4; } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); -- cgit v1.2.3-59-g8ed1b From 3c936465249f863f322154ff1aaa628b84ee5750 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jan 2006 18:30:27 -0800 Subject: [SPARC64]: Kill pgtable quicklists and use SLAB. Taking a nod from the powerpc port. With the per-cpu caching of both the page allocator and SLAB, the pgtable quicklist scheme becomes relatively silly and primitive. Signed-off-by: David S. Miller --- arch/sparc64/kernel/sparc64_ksyms.c | 4 - arch/sparc64/mm/init.c | 32 ++++---- include/asm-sparc64/cpudata.h | 9 +- include/asm-sparc64/pgalloc.h | 158 ++++++------------------------------ include/asm-sparc64/pgtable.h | 7 +- 5 files changed, 44 insertions(+), 166 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 3c06bfb92a8c..f1f01378d079 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -241,10 +241,6 @@ EXPORT_SYMBOL(verify_compat_iovec); #endif EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL(pte_alloc_one_kernel); -#ifndef CONFIG_SMP -EXPORT_SYMBOL(pgt_quicklists); -#endif EXPORT_SYMBOL(put_fs_struct); /* math-emu wants this */ diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 936ae1a594ac..7c456afaa9a5 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -141,26 +141,25 @@ unsigned long sparc64_kern_sec_context __read_mostly; int bigkernel = 0; -/* XXX Tune this... */ -#define PGT_CACHE_LOW 25 -#define PGT_CACHE_HIGH 50 +kmem_cache_t *pgtable_cache __read_mostly; -#ifndef CONFIG_SMP -struct pgtable_cache_struct pgt_quicklists; -#endif +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +{ + clear_page(addr); +} -void check_pgt_cache(void) +void pgtable_cache_init(void) { - preempt_disable(); - if (pgtable_cache_size > PGT_CACHE_HIGH) { - do { - if (pgd_quicklist) - free_pgd_slow(get_pgd_fast()); - if (pte_quicklist) - free_pte_slow(pte_alloc_one_fast()); - } while (pgtable_cache_size > PGT_CACHE_LOW); + pgtable_cache = kmem_cache_create("pgtable_cache", + PAGE_SIZE, PAGE_SIZE, + SLAB_HWCACHE_ALIGN | + SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (!pgtable_cache) { + prom_printf("pgtable_cache_init(): Could not create!\n"); + prom_halt(); } - preempt_enable(); } #ifdef CONFIG_DEBUG_DCFLUSH @@ -340,7 +339,6 @@ void show_mem(void) nr_swap_pages << (PAGE_SHIFT-10)); printk("%ld pages of RAM\n", num_physpages); printk("%d free pages\n", nr_free_pages()); - printk("%d pages in page table cache\n",pgtable_cache_size); } void mmu_info(struct seq_file *m) diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 45a9a2cfaf79..f7c0faede8b8 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -17,14 +17,7 @@ typedef struct { unsigned long clock_tick; /* %tick's per second */ unsigned long udelay_val; - /* Dcache line 2 */ - unsigned int pgcache_size; - unsigned int __pad1; - unsigned long *pte_cache; - unsigned long *pgd_cache; - unsigned long __pad2; - - /* Dcache line 3, rarely used */ + /* Dcache line 2, rarely used */ unsigned int dcache_size; unsigned int dcache_line_size; unsigned int icache_size; diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index ecea1bbdc115..12e4a273bd43 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -13,164 +14,59 @@ #include /* Page table allocation/freeing. */ -#ifdef CONFIG_SMP -/* Sliiiicck */ -#define pgt_quicklists local_cpu_data() -#else -extern struct pgtable_cache_struct { - unsigned long *pgd_cache; - unsigned long *pte_cache; - unsigned int pgcache_size; -} pgt_quicklists; -#endif -#define pgd_quicklist (pgt_quicklists.pgd_cache) -#define pte_quicklist (pgt_quicklists.pte_cache) -#define pgtable_cache_size (pgt_quicklists.pgcache_size) +extern kmem_cache_t *pgtable_cache; -static inline void free_pgd_fast(pgd_t *pgd) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - preempt_disable(); - *(unsigned long *)pgd = (unsigned long) pgd_quicklist; - pgd_quicklist = (unsigned long *) pgd; - pgtable_cache_size++; - preempt_enable(); + return kmem_cache_alloc(pgtable_cache, GFP_KERNEL); } -static inline pgd_t *get_pgd_fast(void) +static inline void pgd_free(pgd_t *pgd) { - unsigned long *ret; - - preempt_disable(); - if((ret = pgd_quicklist) != NULL) { - pgd_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - preempt_enable(); - } else { - preempt_enable(); - ret = (unsigned long *) __get_free_page(GFP_KERNEL|__GFP_REPEAT); - if(ret) - memset(ret, 0, PAGE_SIZE); - } - return (pgd_t *)ret; -} - -static inline void free_pgd_slow(pgd_t *pgd) -{ - free_page((unsigned long)pgd); + kmem_cache_free(pgtable_cache, pgd); } #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) -static inline pmd_t *pmd_alloc_one_fast(void) -{ - unsigned long *ret; - - preempt_disable(); - ret = (unsigned long *) pte_quicklist; - if (likely(ret)) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } - preempt_enable(); - - return (pmd_t *) ret; -} - -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) -{ - pmd_t *pmd; - - pmd = pmd_alloc_one_fast(); - if (unlikely(!pmd)) { - pmd = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); - if (pmd) - memset(pmd, 0, PAGE_SIZE); - } - return pmd; -} - -static inline void free_pmd_fast(pmd_t *pmd) -{ - preempt_disable(); - *(unsigned long *)pmd = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pmd; - pgtable_cache_size++; - preempt_enable(); -} - -static inline void free_pmd_slow(pmd_t *pmd) -{ - free_page((unsigned long)pmd); -} - -#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) -#define pmd_populate(MM,PMD,PTE_PAGE) \ - pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) - -static inline pte_t *pte_alloc_one_fast(void) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - unsigned long *ret; - - preempt_disable(); - ret = (unsigned long *) pte_quicklist; - if (likely(ret)) { - pte_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } - preempt_enable(); - - return (pte_t *) ret; + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); } -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +static inline void pmd_free(pmd_t *pmd) { - pte_t *ptep = pte_alloc_one_fast(); - - if (likely(ptep)) - return ptep; - - return (pte_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + kmem_cache_free(pgtable_cache, pmd); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) { - pte_t *pte = pte_alloc_one_fast(); - - if (likely(pte)) - return virt_to_page(pte); - - return alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + return kmem_cache_alloc(pgtable_cache, + GFP_KERNEL|__GFP_REPEAT); } -static inline void free_pte_fast(pte_t *pte) +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) { - preempt_disable(); - *(unsigned long *)pte = (unsigned long) pte_quicklist; - pte_quicklist = (unsigned long *) pte; - pgtable_cache_size++; - preempt_enable(); + return virt_to_page(pte_alloc_one_kernel(mm, address)); } - -static inline void free_pte_slow(pte_t *pte) -{ - free_page((unsigned long) pte); -} - + static inline void pte_free_kernel(pte_t *pte) { - free_pte_fast(pte); + kmem_cache_free(pgtable_cache, pte); } static inline void pte_free(struct page *ptepage) { - free_pte_fast(page_address(ptepage)); + pte_free_kernel(page_address(ptepage)); } -#define pmd_free(pmd) free_pmd_fast(pmd) -#define pgd_free(pgd) free_pgd_fast(pgd) -#define pgd_alloc(mm) get_pgd_fast() + +#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE) +#define pmd_populate(MM,PMD,PTE_PAGE) \ + pmd_populate_kernel(MM,PMD,page_address(PTE_PAGE)) + +#define check_pgt_cache() do { } while (0) #endif /* _SPARC64_PGALLOC_H */ diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index f3ba1e058195..77ba0b6cc1ce 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -432,12 +432,7 @@ extern unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, unsigned long); #define HAVE_ARCH_FB_UNMAPPED_AREA -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - -extern void check_pgt_cache(void); +extern void pgtable_cache_init(void); #endif /* !(__ASSEMBLY__) */ -- cgit v1.2.3-59-g8ed1b From 56fb4df6da76c35dca22036174e2d1edef83ff1f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 26 Feb 2006 23:24:22 -0800 Subject: [SPARC64]: Elminate all usage of hard-coded trap globals. UltraSPARC has special sets of global registers which are switched to for certain trap types. There is one set for MMU related traps, one set of Interrupt Vector processing, and another set (called the Alternate globals) for all other trap types. For what seems like forever we've hard coded the values in some of these trap registers. Some examples include: 1) Interrupt Vector global %g6 holds current processors interrupt work struct where received interrupts are managed for IRQ handler dispatch. 2) MMU global %g7 holds the base of the page tables of the currently active address space. 3) Alternate global %g6 held the current_thread_info() value. Such hardcoding has resulted in some serious issues in many areas. There are some code sequences where having another register available would help clean up the implementation. Taking traps such as cross-calls from the OBP firmware requires some trick code sequences wherein we have to save away and restore all of the special sets of global registers when we enter/exit OBP. We were also using the IMMU TSB register on SMP to hold the per-cpu area base address, which doesn't work any longer now that we actually use the TSB facility of the cpu. The implementation is pretty straight forward. One tricky bit is getting the current processor ID as that is different on different cpu variants. We use a stub with a fancy calling convention which we patch at boot time. The calling convention is that the stub is branched to and the (PC - 4) to return to is in register %g1. The cpu number is left in %g6. This stub can be invoked by using the __GET_CPUID macro. We use an array of per-cpu trap state to store the current thread and physical address of the current address space's page tables. The TRAP_LOAD_THREAD_REG loads %g6 with the current thread from this table, it uses __GET_CPUID and also clobbers %g1. TRAP_LOAD_IRQ_WORK is used by the interrupt vector processing to load the current processor's IRQ software state into %g6. It also uses __GET_CPUID and clobbers %g1. Finally, TRAP_LOAD_PGD_PHYS loads the physical address base of the current address space's page tables into %g7, it clobbers %g1 and uses __GET_CPUID. Many refinements are possible, as well as some tuning, with this stuff in place. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 122 +++++++++++++++++++++++++++++++++------ arch/sparc64/kernel/etrap.S | 18 +++--- arch/sparc64/kernel/head.S | 20 +------ arch/sparc64/kernel/irq.c | 26 +-------- arch/sparc64/kernel/rtrap.S | 10 ++-- arch/sparc64/kernel/setup.c | 8 +++ arch/sparc64/kernel/smp.c | 55 ++++-------------- arch/sparc64/kernel/trampoline.S | 9 +-- arch/sparc64/kernel/traps.c | 19 ++++++ arch/sparc64/kernel/tsb.S | 26 +++++++-- arch/sparc64/kernel/ttable.S | 2 +- arch/sparc64/kernel/winfixup.S | 24 +++----- arch/sparc64/mm/ultra.S | 10 ++-- include/asm-sparc64/cpudata.h | 86 ++++++++++++++++++++++++++- include/asm-sparc64/system.h | 2 + include/asm-sparc64/ttable.h | 18 +++--- 16 files changed, 287 insertions(+), 168 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a73553ae7e53..906b64ffdb1b 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -50,7 +50,8 @@ do_fpdis: add %g0, %g0, %g0 ba,a,pt %xcc, rtrap_clr_l6 -1: ldub [%g6 + TI_FPSAVED], %g5 +1: TRAP_LOAD_THREAD_REG + ldub [%g6 + TI_FPSAVED], %g5 wr %g0, FPRS_FEF, %fprs andcc %g5, FPRS_FEF, %g0 be,a,pt %icc, 1f @@ -189,6 +190,7 @@ fp_other_bounce: .globl do_fpother_check_fitos .align 32 do_fpother_check_fitos: + TRAP_LOAD_THREAD_REG sethi %hi(fp_other_bounce - 4), %g7 or %g7, %lo(fp_other_bounce - 4), %g7 @@ -353,8 +355,6 @@ do_fptrap_after_fsr: * * With this method we can do most of the cross-call tlb/cache * flushing very quickly. - * - * Current CPU's IRQ worklist table is locked into %g6, don't touch. */ .text .align 32 @@ -378,6 +378,8 @@ do_ivec: sllx %g2, %g4, %g2 sllx %g4, 2, %g4 + TRAP_LOAD_IRQ_WORK + lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */ @@ -488,9 +490,24 @@ setcc: retl stx %o1, [%o0 + PT_V9_TSTATE] - .globl utrap, utrap_ill -utrap: brz,pn %g1, etrap + .globl utrap_trap +utrap_trap: /* %g3=handler,%g4=level */ + TRAP_LOAD_THREAD_REG + ldx [%g6 + TI_UTRAPS], %g1 + brnz,pt %g1, invoke_utrap nop + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + call bad_trap + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 + +invoke_utrap: + sllx %g3, 3, %g3 + ldx [%g1 + %g3], %g1 save %sp, -128, %sp rdpr %tstate, %l6 rdpr %cwp, %l7 @@ -500,17 +517,6 @@ utrap: brz,pn %g1, etrap rdpr %tnpc, %l7 wrpr %g1, 0, %tnpc done -utrap_ill: - call bad_trap - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 - - /* XXX Here is stuff we still need to write... -DaveM XXX */ - .globl netbsd_syscall -netbsd_syscall: - retl - nop /* We need to carefully read the error status, ACK * the errors, prevent recursive traps, and pass the @@ -1001,7 +1007,7 @@ dcpe_icpe_tl1_common: * %g3: scratch * %g4: AFSR * %g5: AFAR - * %g6: current thread ptr + * %g6: unused, will have current thread ptr after etrap * %g7: scratch */ __cheetah_log_error: @@ -1690,3 +1696,85 @@ __flushw_user: restore %g0, %g0, %g0 2: retl nop + + /* Read cpu ID from hardware, return in %g6. + * (callers_pc - 4) is in %g1. Patched at boot time. + * + * Default is spitfire implementation. + * + * The instruction sequence needs to be 5 instructions + * in order to fit the longest implementation, which is + * currently starfire. + */ + .align 32 + .globl __get_cpu_id +__get_cpu_id: + ldxa [%g0] ASI_UPA_CONFIG, %g6 + srlx %g6, 17, %g6 + jmpl %g1 + 0x4, %g0 + and %g6, 0x1f, %g6 + nop + +__get_cpu_id_cheetah_safari: + ldxa [%g0] ASI_SAFARI_CONFIG, %g6 + srlx %g6, 17, %g6 + jmpl %g1 + 0x4, %g0 + and %g6, 0x3ff, %g6 + nop + +__get_cpu_id_cheetah_jbus: + ldxa [%g0] ASI_JBUS_CONFIG, %g6 + srlx %g6, 17, %g6 + jmpl %g1 + 0x4, %g0 + and %g6, 0x1f, %g6 + nop + +__get_cpu_id_starfire: + sethi %hi(0x1fff40000d0 >> 9), %g6 + sllx %g6, 9, %g6 + or %g6, 0xd0, %g6 + jmpl %g1 + 0x4, %g0 + lduwa [%g6] ASI_PHYS_BYPASS_EC_E, %g6 + + .globl per_cpu_patch +per_cpu_patch: + sethi %hi(this_is_starfire), %o0 + lduw [%o0 + %lo(this_is_starfire)], %o1 + sethi %hi(__get_cpu_id_starfire), %o0 + brnz,pn %o1, 10f + or %o0, %lo(__get_cpu_id_starfire), %o0 + sethi %hi(tlb_type), %o0 + lduw [%o0 + %lo(tlb_type)], %o1 + brz,pt %o1, 11f + nop + rdpr %ver, %o0 + srlx %o0, 32, %o0 + sethi %hi(0x003e0016), %o1 + or %o1, %lo(0x003e0016), %o1 + cmp %o0, %o1 + sethi %hi(__get_cpu_id_cheetah_jbus), %o0 + be,pn %icc, 10f + or %o0, %lo(__get_cpu_id_cheetah_jbus), %o0 + sethi %hi(__get_cpu_id_cheetah_safari), %o0 + or %o0, %lo(__get_cpu_id_cheetah_safari), %o0 +10: + sethi %hi(__get_cpu_id), %o1 + or %o1, %lo(__get_cpu_id), %o1 + lduw [%o0 + 0x00], %o2 + stw %o2, [%o1 + 0x00] + flush %o1 + 0x00 + lduw [%o0 + 0x04], %o2 + stw %o2, [%o1 + 0x04] + flush %o1 + 0x04 + lduw [%o0 + 0x08], %o2 + stw %o2, [%o1 + 0x08] + flush %o1 + 0x08 + lduw [%o0 + 0x0c], %o2 + stw %o2, [%o1 + 0x0c] + flush %o1 + 0x0c + lduw [%o0 + 0x10], %o2 + stw %o2, [%o1 + 0x10] + flush %o1 + 0x10 +11: + retl + nop diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 567dbb765c34..8b3b6d720ed5 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -31,6 +31,7 @@ .globl etrap, etrap_irq, etraptl1 etrap: rdpr %pil, %g2 etrap_irq: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -98,11 +99,7 @@ etrap_irq: stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g4, %g3) jmpl %l2 + 0x4, %g0 ldx [%g6 + TI_TASK], %g4 @@ -126,6 +123,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. * 0x58 TL4's TT * 0x60 TL */ + TRAP_LOAD_THREAD_REG sub %sp, ((4 * 8) * 4) + 8, %g2 rdpr %tl, %g1 @@ -179,7 +177,9 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. .align 64 .globl scetrap -scetrap: rdpr %pil, %g2 +scetrap: + TRAP_LOAD_THREAD_REG + rdpr %pil, %g2 rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -248,11 +248,7 @@ scetrap: rdpr %pil, %g2 stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] mov %l6, %g6 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g4, %g3) ldx [%g6 + TI_TASK], %g4 done diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index d00e20693be1..82ce5bced9c7 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -26,6 +26,7 @@ #include #include #include +#include /* This section from from _start to sparc64_boot_end should fit into * 0x0000000000404000 to 0x0000000000408000. @@ -421,24 +422,6 @@ setup_trap_table: stxa %g2, [%g1] ASI_DMMU membar #Sync - /* The Linux trap handlers expect various trap global registers - * to be setup with some fixed values. So here we set these - * up very carefully. These globals are: - * - * Alternate Globals (PSTATE_AG): - * - * %g6 --> current_thread_info() - * - * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()): - * - * %g6 --> __irq_work[smp_processor_id()] - */ - - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate - mov %o2, %g6 - /* Kill PROM timer */ sethi %hi(0x80000000), %o2 sllx %o2, 32, %o2 @@ -457,7 +440,6 @@ setup_trap_table: 2: wrpr %g0, %g0, %wstate - wrpr %o1, 0x0, %pstate call init_irqwork_curcpu nop diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index f7490ef629b9..3e48af2769d4 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -848,33 +848,9 @@ static void kill_prom_timer(void) void init_irqwork_curcpu(void) { - register struct irq_work_struct *workp asm("o2"); - register unsigned long tmp asm("o3"); int cpu = hard_smp_processor_id(); - memset(__irq_work + cpu, 0, sizeof(*workp)); - - /* Make sure we are called with PSTATE_IE disabled. */ - __asm__ __volatile__("rdpr %%pstate, %0\n\t" - : "=r" (tmp)); - if (tmp & PSTATE_IE) { - prom_printf("BUG: init_irqwork_curcpu() called with " - "PSTATE_IE enabled, bailing.\n"); - __asm__ __volatile__("mov %%i7, %0\n\t" - : "=r" (tmp)); - prom_printf("BUG: Called from %lx\n", tmp); - prom_halt(); - } - - /* Set interrupt globals. */ - workp = &__irq_work[cpu]; - __asm__ __volatile__( - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate\n\t" - "mov %2, %%g6\n\t" - "wrpr %0, 0x0, %%pstate\n\t" - : "=&r" (tmp) - : "i" (PSTATE_IG), "r" (workp)); + memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct)); } /* Only invoked on boot processor. */ diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index 213eb4a9d8a4..5a62ec5d531c 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -223,12 +223,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g6 - brnz,a,pn %l3, 1f - ldxa [%g6] ASI_IMMU, %g5 -#endif + brz,pt %l3, 1f + nop + /* Must do this before thread reg is clobbered below. */ + LOAD_PER_CPU_BASE(%g6, %g7) 1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 158bd31e15b7..59a70301a6cf 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -507,6 +507,11 @@ void __init setup_arch(char **cmdline_p) /* Work out if we are starfire early on */ check_if_starfire(); + /* Now we know enough to patch the __get_cpu_id() + * trampoline used by trap code. + */ + per_cpu_patch(); + boot_flags_init(*cmdline_p); idprom_init(); @@ -545,6 +550,9 @@ void __init setup_arch(char **cmdline_p) smp_setup_cpu_possible_map(); paging_init(); + + /* Get boot processor trap_block[] setup. */ + init_cur_cpu_trap(); } static int __init set_preferred_console(void) diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index d2d3369e7b5d..8c245859d212 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -38,6 +38,7 @@ #include #include #include +#include extern void calibrate_delay(void); @@ -87,10 +88,6 @@ void __init smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); - cpu_data(id).pgcache_size = 0; - cpu_data(id).pte_cache[0] = NULL; - cpu_data(id).pte_cache[1] = NULL; - cpu_data(id).pgd_cache = NULL; cpu_data(id).idle_volume = 1; cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", @@ -121,26 +118,15 @@ static volatile unsigned long callin_flag = 0; extern void inherit_locked_prom_mappings(int save_p); -static inline void cpu_setup_percpu_base(unsigned long cpu_id) -{ -#error IMMU TSB usage must be fixed - __asm__ __volatile__("mov %0, %%g5\n\t" - "stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (__per_cpu_offset(cpu_id)), - "r" (TSB_REG), "i" (ASI_IMMU)); -} - void __init smp_callin(void) { int cpuid = hard_smp_processor_id(); inherit_locked_prom_mappings(0); - __flush_tlb_all(); + __local_per_cpu_offset = __per_cpu_offset(cpuid); - cpu_setup_percpu_base(cpuid); + __flush_tlb_all(); smp_setup_percpu_timer(); @@ -1107,12 +1093,15 @@ void __init smp_setup_cpu_possible_map(void) void __devinit smp_prepare_boot_cpu(void) { - if (hard_smp_processor_id() >= NR_CPUS) { + int cpu = hard_smp_processor_id(); + + if (cpu >= NR_CPUS) { prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); prom_halt(); } - current_thread_info()->cpu = hard_smp_processor_id(); + current_thread_info()->cpu = cpu; + __local_per_cpu_offset = __per_cpu_offset(cpu); cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), phys_cpu_present_map); @@ -1173,12 +1162,9 @@ void __init setup_per_cpu_areas(void) { unsigned long goal, size, i; char *ptr; - /* Created by linker magic */ - extern char __per_cpu_start[], __per_cpu_end[]; /* Copy section for each CPU (we discard the original) */ - goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); - + goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); #ifdef CONFIG_MODULES if (goal < PERCPU_ENOUGH_ROOM) goal = PERCPU_ENOUGH_ROOM; @@ -1187,31 +1173,10 @@ void __init setup_per_cpu_areas(void) for (size = 1UL; size < goal; size <<= 1UL) __per_cpu_shift++; - /* Make sure the resulting __per_cpu_base value - * will fit in the 43-bit sign extended IMMU - * TSB register. - */ - ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE, - (unsigned long) __per_cpu_start); + ptr = alloc_bootmem(size * NR_CPUS); __per_cpu_base = ptr - __per_cpu_start; - if ((__per_cpu_shift < PAGE_SHIFT) || - (__per_cpu_base & ~PAGE_MASK) || - (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) { - prom_printf("PER_CPU: Invalid layout, " - "ptr[%p] shift[%lx] base[%lx]\n", - ptr, __per_cpu_shift, __per_cpu_base); - prom_halt(); - } - for (i = 0; i < NR_CPUS; i++, ptr += size) memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - - /* Finally, load in the boot cpu's base value. - * We abuse the IMMU TSB register for trap handler - * entry and exit loading of %g5. That is why it - * has to be page aligned. - */ - cpu_setup_percpu_base(hard_smp_processor_id()); } diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 782d8c4973e4..18c333f841e3 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -287,21 +287,18 @@ do_unlock: wrpr %g0, 0, %wstate wrpr %g0, 0, %tl - /* Setup the trap globals, then we can resurface. */ - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate + /* Load TBA, then we can resurface. */ sethi %hi(sparc64_ttable_tl0), %g5 wrpr %g5, %tba - mov %o2, %g6 - wrpr %o1, 0x0, %pstate ldx [%g6 + TI_TASK], %g4 wrpr %g0, 0, %wstate call init_irqwork_curcpu nop + call init_cur_cpu_trap + nop /* Start using proper page size encodings in ctx register. */ sethi %hi(sparc64_kern_pri_context), %g3 diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 8d44ae5a15e3..f47f4874253c 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -2130,7 +2130,22 @@ void do_getpsr(struct pt_regs *regs) } } +struct trap_per_cpu trap_block[NR_CPUS]; + +/* This can get invoked before sched_init() so play it super safe + * and use hard_smp_processor_id(). + */ +void init_cur_cpu_trap(void) +{ + int cpu = hard_smp_processor_id(); + struct trap_per_cpu *p = &trap_block[cpu]; + + p->thread = current_thread_info(); + p->pgd_paddr = 0; +} + extern void thread_info_offsets_are_bolixed_dave(void); +extern void trap_per_cpu_offsets_are_bolixed_dave(void); /* Only invoked on boot processor. */ void __init trap_init(void) @@ -2165,6 +2180,10 @@ void __init trap_init(void) (TI_FPREGS & (64 - 1))) thread_info_offsets_are_bolixed_dave(); + if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || + TRAP_PER_CPU_PGD_PADDR != offsetof(struct trap_per_cpu, pgd_paddr)) + trap_per_cpu_offsets_are_bolixed_dave(); + /* Attach to the address space of init_task. On SMP we * do this in smp.c:smp_callin for other cpus. */ diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index 44b9e6fed09f..50752c518773 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -36,6 +36,15 @@ tsb_miss_itlb: nop tsb_miss_page_table_walk: + /* This clobbers %g1 and %g6, preserve them... */ + mov %g1, %g5 + mov %g6, %g2 + + TRAP_LOAD_PGD_PHYS + + mov %g2, %g6 + mov %g5, %g1 + USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) tsb_reload: @@ -112,15 +121,20 @@ winfix_trampoline: * %o0: page table physical address * %o1: TSB address */ + .align 32 .globl tsb_context_switch tsb_context_switch: - wrpr %g0, PSTATE_MG | PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV, %pstate + rdpr %pstate, %o5 + wrpr %o5, PSTATE_IE, %pstate - /* Set page table base alternate global. */ - mov %o0, %g7 + ldub [%g6 + TI_CPU], %o3 + sethi %hi(trap_block), %o4 + sllx %o3, TRAP_BLOCK_SZ_SHIFT, %o3 + or %o4, %lo(trap_block), %o4 + add %o4, %o3, %o4 + stx %o0, [%o4 + TRAP_PER_CPU_PGD_PADDR] - /* XXX can this happen? */ - brz,pn %o1, 9f + brgez %o1, 9f nop /* Lock TSB into D-TLB. */ @@ -163,7 +177,7 @@ tsb_context_switch: membar #Sync 9: - wrpr %g0, PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE, %pstate + wrpr %o5, %pstate retl mov %o2, %o0 diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 56f060c8fbf0..2fb7a33993c0 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -128,7 +128,7 @@ tl0_flushw: FLUSH_WINDOW_TRAP tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) .globl tl0_solaris tl0_solaris: SOLARIS_SYSCALL_TRAP -tl0_netbsd: NETBSD_SYSCALL_TRAP +tl0_resv109: BTRAP(0x109) tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e) tl0_resv10f: BTRAP(0x10f) tl0_linux32: LINUX_32BIT_SYSCALL_TRAP diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index f5d93aa99cbb..de588036df43 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -39,6 +39,7 @@ set_pcontext: */ .globl fill_fixup, spill_fixup fill_fixup: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 or %g4, FAULT_CODE_WINFIXUP, %g4 @@ -84,11 +85,7 @@ fill_fixup: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ldx [%g6 + TI_TASK], %g4 -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g1, %g2) /* This is the same as below, except we handle this a bit special * since we must preserve %l5 and %l6, see comment above. @@ -107,6 +104,7 @@ fill_fixup: * do not touch %g7 or %g2 so we handle the two cases fine. */ spill_fixup: + TRAP_LOAD_THREAD_REG ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -182,6 +180,7 @@ winfix_mna: wrpr %g3, %tnpc done fill_fixup_mna: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_mna_from_user_common @@ -209,17 +208,14 @@ fill_fixup_mna: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g1, %g2) call mem_address_unaligned add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_mna: + TRAP_LOAD_THREAD_REG ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -287,6 +283,7 @@ winfix_dax: wrpr %g3, %tnpc done fill_fixup_dax: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_dax_from_user_common @@ -314,17 +311,14 @@ fill_fixup_dax: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g1, %g2) call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_dax: + TRAP_LOAD_THREAD_REG ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 22791f29552e..a87394824ec2 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -295,12 +295,10 @@ cheetah_patch_cachetlbops: * %g1 address arg 1 (tlb page and range flushes) * %g7 address arg 2 (tlb range flush only) * - * %g6 ivector table, don't touch - * %g2 scratch 1 - * %g3 scratch 2 - * %g4 scratch 3 - * - * TODO: Make xcall TLB range flushes use the tricks above... -DaveM + * %g6 scratch 1 + * %g2 scratch 2 + * %g3 scratch 3 + * %g4 scratch 4 */ .align 32 .globl xcall_flush_tlb_mm diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index f7c0faede8b8..6c57cbb9a7d1 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -1,12 +1,15 @@ /* cpudata.h: Per-cpu parameters. * - * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com) + * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net) */ #ifndef _SPARC64_CPUDATA_H #define _SPARC64_CPUDATA_H +#ifndef __ASSEMBLY__ + #include +#include typedef struct { /* Dcache line 1 */ @@ -32,4 +35,85 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) #define local_cpu_data() __get_cpu_var(__cpu_data) +/* Trap handling code needs to get at a few critical values upon + * trap entry and to process TSB misses. These cannot be in the + * per_cpu() area as we really need to lock them into the TLB and + * thus make them part of the main kernel image. As a result we + * try to make this as small as possible. + * + * This is padded out and aligned to 64-bytes to avoid false sharing + * on SMP. + */ + +/* If you modify the size of this structure, please update + * TRAP_BLOCK_SZ_SHIFT below. + */ +struct thread_info; +struct trap_per_cpu { +/* D-cache line 1 */ + struct thread_info *thread; + unsigned long pgd_paddr; + unsigned long __pad1[2]; + +/* D-cache line 2 */ + unsigned long __pad2[4]; +} __attribute__((aligned(64))); +extern struct trap_per_cpu trap_block[NR_CPUS]; +extern void init_cur_cpu_trap(void); +extern void per_cpu_patch(void); + +#endif /* !(__ASSEMBLY__) */ + +#define TRAP_PER_CPU_THREAD 0x00 +#define TRAP_PER_CPU_PGD_PADDR 0x08 + +#define TRAP_BLOCK_SZ_SHIFT 6 + +/* Clobbers %g1, loads %g6 with local processor's cpuid */ +#define __GET_CPUID \ + ba,pt %xcc, __get_cpu_id; \ + rd %pc, %g1; + +/* Clobbers %g1, current address space PGD phys address into %g7. */ +#define TRAP_LOAD_PGD_PHYS \ + __GET_CPUID \ + sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ + sethi %hi(trap_block), %g7; \ + or %g7, %lo(trap_block), %g7; \ + add %g7, %g6, %g7; \ + ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; + +/* Clobbers %g1, loads local processor's IRQ work area into %g6. */ +#define TRAP_LOAD_IRQ_WORK \ + __GET_CPUID \ + sethi %hi(__irq_work), %g1; \ + sllx %g6, 6, %g6; \ + or %g1, %lo(__irq_work), %g1; \ + add %g1, %g6, %g6; + +/* Clobbers %g1, loads %g6 with current thread info pointer. */ +#define TRAP_LOAD_THREAD_REG \ + __GET_CPUID \ + sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ + sethi %hi(trap_block), %g1; \ + or %g1, %lo(trap_block), %g1; \ + ldx [%g1 + %g6], %g6; + +/* Given the current thread info pointer in %g6, load the per-cpu + * area base of the current processor into %g5. REG1 and REG2 are + * clobbered. + */ +#ifdef CONFIG_SMP +#define LOAD_PER_CPU_BASE(REG1, REG2) \ + ldub [%g6 + TI_CPU], REG1; \ + sethi %hi(__per_cpu_shift), %g5; \ + sethi %hi(__per_cpu_base), REG2; \ + ldx [%g5 + %lo(__per_cpu_shift)], %g5; \ + ldx [REG2 + %lo(__per_cpu_base)], REG2; \ + sllx REG1, %g5, %g5; \ + add %g5, REG2, %g5; +#else +#define LOAD_PER_CPU_BASE(REG1, REG2) +#endif + #endif /* _SPARC64_CPUDATA_H */ diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index af254e581834..26c0807af3e4 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -209,6 +209,8 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ : : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\ + trap_block[current_thread_info()->cpu].thread = \ + task_thread_info(next); \ __asm__ __volatile__( \ "mov %%g4, %%g7\n\t" \ "wrpr %%g0, 0x95, %%pstate\n\t" \ diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h index 2784f80094c3..f557db4faf84 100644 --- a/include/asm-sparc64/ttable.h +++ b/include/asm-sparc64/ttable.h @@ -109,14 +109,14 @@ nop;nop;nop; #define TRAP_UTRAP(handler,lvl) \ - ldx [%g6 + TI_UTRAPS], %g1; \ - sethi %hi(109f), %g7; \ - brz,pn %g1, utrap; \ - or %g7, %lo(109f), %g7; \ - ba,pt %xcc, utrap; \ -109: ldx [%g1 + handler*8], %g1; \ - ba,pt %xcc, utrap_ill; \ - mov lvl, %o1; + mov handler, %g3; \ + ba,pt %xcc, utrap_trap; \ + mov lvl, %g4; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; #ifdef CONFIG_SUNOS_EMUL #define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table) @@ -136,8 +136,6 @@ #else #define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall) #endif -/* FIXME: Write these actually */ -#define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall) #define BREAKPOINT_TRAP TRAP(breakpoint_trap) #define TRAP_IRQ(routine, level) \ -- cgit v1.2.3-59-g8ed1b From a8b900d801697609d1b56cc9c110148c64678068 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jan 2006 18:33:37 -0800 Subject: [SPARC64]: Kill sole argument passed to setup_tba(). No longer used, and move extern declaration to a header file. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 2 +- arch/sparc64/mm/init.c | 11 ++--------- include/asm-sparc64/cpudata.h | 1 + 3 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 82ce5bced9c7..2988be85147c 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -454,7 +454,7 @@ setup_trap_table: restore .globl setup_tba -setup_tba: /* i0 = is_starfire */ +setup_tba: save %sp, -192, %sp /* The boot processor is the only cpu which invokes this diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index f4d22ccb4cf0..20e7af552ce4 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1092,15 +1092,8 @@ void __init paging_init(void) inherit_prom_mappings(); - /* Ok, we can use our TLB miss and window trap handlers safely. - * We need to do a quick peek here to see if we are on StarFire - * or not, so setup_tba can setup the IRQ globals correctly (it - * needs to get the hard smp processor id correctly). - */ - { - extern void setup_tba(int); - setup_tba(this_is_starfire); - } + /* Ok, we can use our TLB miss and window trap handlers safely. */ + setup_tba(); __flush_tlb_all(); diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 6c57cbb9a7d1..16d628913837 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -61,6 +61,7 @@ struct trap_per_cpu { extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(void); extern void per_cpu_patch(void); +extern void setup_tba(void); #endif /* !(__ASSEMBLY__) */ -- cgit v1.2.3-59-g8ed1b From 86b818687d4894063ecd1190e54717a0cce8c009 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 31 Jan 2006 18:34:51 -0800 Subject: [SPARC64]: Fix race in LOAD_PER_CPU_BASE() Since we use %g5 itself as a temporary, it can get clobbered if we take an interrupt mid-stream and thus cause end up with the final %g5 value too early as a result of rtrap processing. Set %g5 at the very end, atomically, to avoid this problem. Signed-off-by: David S. Miller --- arch/sparc64/kernel/etrap.S | 4 ++-- arch/sparc64/kernel/rtrap.S | 2 +- arch/sparc64/kernel/winfixup.S | 6 +++--- include/asm-sparc64/cpudata.h | 19 ++++++++++++------- 4 files changed, 18 insertions(+), 13 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index db7681017299..d974d18b15be 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -100,7 +100,7 @@ etrap_irq: stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 - LOAD_PER_CPU_BASE(%g4, %g3) + LOAD_PER_CPU_BASE(%g4, %g3, %l1) jmpl %l2 + 0x4, %g0 ldx [%g6 + TI_TASK], %g4 @@ -250,7 +250,7 @@ scetrap: stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] mov %l6, %g6 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] - LOAD_PER_CPU_BASE(%g4, %g3) + LOAD_PER_CPU_BASE(%g4, %g3, %l1) ldx [%g6 + TI_TASK], %g4 done diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index 89794ebdcbcf..64bc03610bc6 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -226,7 +226,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 brz,pt %l3, 1f nop /* Must do this before thread reg is clobbered below. */ - LOAD_PER_CPU_BASE(%g6, %g7) + LOAD_PER_CPU_BASE(%i0, %i1, %i2) 1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index c0545d089c96..ade991b7d079 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -86,7 +86,7 @@ fill_fixup: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ldx [%g6 + TI_TASK], %g4 - LOAD_PER_CPU_BASE(%g1, %g2) + LOAD_PER_CPU_BASE(%g1, %g2, %g3) /* This is the same as below, except we handle this a bit special * since we must preserve %l5 and %l6, see comment above. @@ -209,7 +209,7 @@ fill_fixup_mna: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. - LOAD_PER_CPU_BASE(%g1, %g2) + LOAD_PER_CPU_BASE(%g1, %g2, %g3) call mem_address_unaligned add %sp, PTREGS_OFF, %o0 @@ -312,7 +312,7 @@ fill_fixup_dax: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. - LOAD_PER_CPU_BASE(%g1, %g2) + LOAD_PER_CPU_BASE(%g1, %g2, %g3) call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 16d628913837..f83768883e98 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -101,20 +101,25 @@ extern void setup_tba(void); ldx [%g1 + %g6], %g6; /* Given the current thread info pointer in %g6, load the per-cpu - * area base of the current processor into %g5. REG1 and REG2 are + * area base of the current processor into %g5. REG1, REG2, and REG3 are * clobbered. + * + * You absolutely cannot use %g5 as a temporary in this code. The + * reason is that traps can happen during execution, and return from + * trap will load the fully resolved %g5 per-cpu base. This can corrupt + * the calculations done by the macro mid-stream. */ #ifdef CONFIG_SMP -#define LOAD_PER_CPU_BASE(REG1, REG2) \ +#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \ ldub [%g6 + TI_CPU], REG1; \ - sethi %hi(__per_cpu_shift), %g5; \ + sethi %hi(__per_cpu_shift), REG3; \ sethi %hi(__per_cpu_base), REG2; \ - ldx [%g5 + %lo(__per_cpu_shift)], %g5; \ + ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ ldx [REG2 + %lo(__per_cpu_base)], REG2; \ - sllx REG1, %g5, %g5; \ - add %g5, REG2, %g5; + sllx REG1, REG3, REG3; \ + add REG3, REG2, %g5; #else -#define LOAD_PER_CPU_BASE(REG1, REG2) +#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) #endif #endif /* _SPARC64_CPUDATA_H */ -- cgit v1.2.3-59-g8ed1b From 92704a1c63c3b481870d02636d0b5a70c7e21cd1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 26 Feb 2006 23:27:19 -0800 Subject: [SPARC64]: Refine code sequences to get the cpu id. On uniprocessor, it's always zero for optimize that. On SMP, the jmpl to the stub kills the return address stack in the cpu branch prediction logic, so expand the code sequence inline and use a code patching section to fix things up. This also always better and explicit register selection, which will be taken advantage of in a future changeset. The hard_smp_processor_id() function is big, so do not inline it. Fix up tests for Jalapeno to also test for Serrano chips too. These tests want "jbus Ultra-IIIi" cases to match, so that is what we should test for. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 84 +++--------------------------------- arch/sparc64/kernel/irq.c | 4 +- arch/sparc64/kernel/setup.c | 56 +++++++++++++++++++++++- arch/sparc64/kernel/smp.c | 9 ++-- arch/sparc64/kernel/traps.c | 4 +- arch/sparc64/kernel/vmlinux.lds.S | 3 ++ include/asm-sparc64/cpudata.h | 89 ++++++++++++++++++++++++++++++--------- include/asm-sparc64/head.h | 1 + include/asm-sparc64/smp.h | 28 +----------- 9 files changed, 144 insertions(+), 134 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 563fa4ec33f8..b3511ff5d04a 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -1628,84 +1628,10 @@ __flushw_user: 2: retl nop - /* Read cpu ID from hardware, return in %g6. - * (callers_pc - 4) is in %g1. Patched at boot time. - * - * Default is spitfire implementation. - * - * The instruction sequence needs to be 5 instructions - * in order to fit the longest implementation, which is - * currently starfire. - */ - .align 32 - .globl __get_cpu_id -__get_cpu_id: - ldxa [%g0] ASI_UPA_CONFIG, %g6 - srlx %g6, 17, %g6 - jmpl %g1 + 0x4, %g0 - and %g6, 0x1f, %g6 - nop - -__get_cpu_id_cheetah_safari: - ldxa [%g0] ASI_SAFARI_CONFIG, %g6 - srlx %g6, 17, %g6 - jmpl %g1 + 0x4, %g0 - and %g6, 0x3ff, %g6 - nop - -__get_cpu_id_cheetah_jbus: - ldxa [%g0] ASI_JBUS_CONFIG, %g6 - srlx %g6, 17, %g6 - jmpl %g1 + 0x4, %g0 - and %g6, 0x1f, %g6 - nop - -__get_cpu_id_starfire: - sethi %hi(0x1fff40000d0 >> 9), %g6 - sllx %g6, 9, %g6 - or %g6, 0xd0, %g6 - jmpl %g1 + 0x4, %g0 - lduwa [%g6] ASI_PHYS_BYPASS_EC_E, %g6 - - .globl per_cpu_patch -per_cpu_patch: - sethi %hi(this_is_starfire), %o0 - lduw [%o0 + %lo(this_is_starfire)], %o1 - sethi %hi(__get_cpu_id_starfire), %o0 - brnz,pn %o1, 10f - or %o0, %lo(__get_cpu_id_starfire), %o0 - sethi %hi(tlb_type), %o0 - lduw [%o0 + %lo(tlb_type)], %o1 - brz,pt %o1, 11f - nop - rdpr %ver, %o0 - srlx %o0, 32, %o0 - sethi %hi(0x003e0016), %o1 - or %o1, %lo(0x003e0016), %o1 - cmp %o0, %o1 - sethi %hi(__get_cpu_id_cheetah_jbus), %o0 - be,pn %icc, 10f - or %o0, %lo(__get_cpu_id_cheetah_jbus), %o0 - sethi %hi(__get_cpu_id_cheetah_safari), %o0 - or %o0, %lo(__get_cpu_id_cheetah_safari), %o0 -10: - sethi %hi(__get_cpu_id), %o1 - or %o1, %lo(__get_cpu_id), %o1 - lduw [%o0 + 0x00], %o2 - stw %o2, [%o1 + 0x00] - flush %o1 + 0x00 - lduw [%o0 + 0x04], %o2 - stw %o2, [%o1 + 0x04] - flush %o1 + 0x04 - lduw [%o0 + 0x08], %o2 - stw %o2, [%o1 + 0x08] - flush %o1 + 0x08 - lduw [%o0 + 0x0c], %o2 - stw %o2, [%o1 + 0x0c] - flush %o1 + 0x0c - lduw [%o0 + 0x10], %o2 - stw %o2, [%o1 + 0x10] - flush %o1 + 0x10 -11: +#ifdef CONFIG_SMP + .globl hard_smp_processor_id +hard_smp_processor_id: + __GET_CPUID(%o0) retl nop +#endif diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 3e48af2769d4..d069a6feb535 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_SMP static void distribute_irqs(void); @@ -153,7 +154,8 @@ void enable_irq(unsigned int irq) unsigned long ver; __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { + if ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID) { /* We set it to our JBUS ID. */ __asm__ __volatile__("ldxa [%%g0] %1, %0" : "=r" (tid) diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 59a70301a6cf..f751d11926bc 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -490,6 +490,58 @@ void register_prom_callbacks(void) "' linux-.soft2 to .soft2"); } +static void __init per_cpu_patch(void) +{ +#ifdef CONFIG_SMP + struct cpuid_patch_entry *p; + unsigned long ver; + int is_jbus; + + if (tlb_type == spitfire && !this_is_starfire) + return; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + is_jbus = ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID); + + p = &__cpuid_patch; + while (p < &__cpuid_patch_end) { + unsigned long addr = p->addr; + unsigned int *insns; + + switch (tlb_type) { + case spitfire: + insns = &p->starfire[0]; + break; + case cheetah: + case cheetah_plus: + if (is_jbus) + insns = &p->cheetah_jbus[0]; + else + insns = &p->cheetah_safari[0]; + break; + default: + prom_printf("Unknown cpu type, halting.\n"); + prom_halt(); + }; + + *(unsigned int *) (addr + 0) = insns[0]; + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 4) = insns[1]; + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + *(unsigned int *) (addr + 8) = insns[2]; + __asm__ __volatile__("flush %0" : : "r" (addr + 8)); + + *(unsigned int *) (addr + 12) = insns[3]; + __asm__ __volatile__("flush %0" : : "r" (addr + 12)); + + p++; + } +#endif +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -507,8 +559,8 @@ void __init setup_arch(char **cmdline_p) /* Work out if we are starfire early on */ check_if_starfire(); - /* Now we know enough to patch the __get_cpu_id() - * trampoline used by trap code. + /* Now we know enough to patch the get_cpuid sequences + * used by trap code. */ per_cpu_patch(); diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 0e7552546d36..16b8eca9754e 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -424,7 +424,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) { u64 pstate, ver; - int nack_busy_id, is_jalapeno; + int nack_busy_id, is_jbus; if (cpus_empty(mask)) return; @@ -434,7 +434,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas * derivative processor. */ __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - is_jalapeno = ((ver >> 32) == 0x003e0016); + is_jbus = ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); @@ -459,7 +460,7 @@ retry: for_each_cpu_mask(i, mask) { u64 target = (i << 14) | 0x70; - if (!is_jalapeno) + if (!is_jbus) target |= (nack_busy_id << 24); __asm__ __volatile__( "stxa %%g0, [%0] %1\n\t" @@ -512,7 +513,7 @@ retry: for_each_cpu_mask(i, mask) { u64 check_mask; - if (is_jalapeno) + if (is_jbus) check_mask = (0x2UL << (2*i)); else check_mask = (0x2UL << diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 7e52e8972668..1c4744c047ab 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -38,6 +38,7 @@ #include #include #include +#include #ifdef CONFIG_KMOD #include #endif @@ -788,7 +789,8 @@ void __init cheetah_ecache_flush_init(void) cheetah_error_log[i].afsr = CHAFSR_INVALID; __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { + if ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID) { cheetah_error_table = &__jalapeno_error_table[0]; cheetah_afsr_errors = JPAFSR_ERRORS; } else if ((ver >> 32) == 0x003e0015) { diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 71b943f1c9b1..1639d9c935c3 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -74,6 +74,9 @@ SECTIONS __tsb_phys_patch = .; .tsb_phys_patch : { *(.tsb_phys_patch) } __tsb_phys_patch_end = .; + __cpuid_patch = .; + .cpuid_patch : { *(.cpuid_patch) } + __cpuid_patch_end = .; . = ALIGN(8192); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index f83768883e98..da54b4f35403 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -60,9 +60,18 @@ struct trap_per_cpu { } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(void); -extern void per_cpu_patch(void); extern void setup_tba(void); +#ifdef CONFIG_SMP +struct cpuid_patch_entry { + unsigned int addr; + unsigned int cheetah_safari[4]; + unsigned int cheetah_jbus[4]; + unsigned int starfire[4]; +}; +extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; +#endif + #endif /* !(__ASSEMBLY__) */ #define TRAP_PER_CPU_THREAD 0x00 @@ -70,35 +79,58 @@ extern void setup_tba(void); #define TRAP_BLOCK_SZ_SHIFT 6 -/* Clobbers %g1, loads %g6 with local processor's cpuid */ -#define __GET_CPUID \ - ba,pt %xcc, __get_cpu_id; \ - rd %pc, %g1; +#ifdef CONFIG_SMP + +#define __GET_CPUID(REG) \ + /* Spitfire implementation (default). */ \ +661: ldxa [%g0] ASI_UPA_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x1f, REG; \ + nop; \ + .section .cpuid_patch, "ax"; \ + /* Instruction location. */ \ + .word 661b; \ + /* Cheetah Safari implementation. */ \ + ldxa [%g0] ASI_SAFARI_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x3ff, REG; \ + nop; \ + /* Cheetah JBUS implementation. */ \ + ldxa [%g0] ASI_JBUS_CONFIG, REG; \ + srlx REG, 17, REG; \ + and REG, 0x1f, REG; \ + nop; \ + /* Starfire implementation. */ \ + sethi %hi(0x1fff40000d0 >> 9), REG; \ + sllx REG, 9, REG; \ + or REG, 0xd0, REG; \ + lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\ + .previous; /* Clobbers %g1, current address space PGD phys address into %g7. */ #define TRAP_LOAD_PGD_PHYS \ - __GET_CPUID \ - sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ + __GET_CPUID(%g1) \ sethi %hi(trap_block), %g7; \ + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \ or %g7, %lo(trap_block), %g7; \ - add %g7, %g6, %g7; \ + add %g7, %g1, %g7; \ ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; /* Clobbers %g1, loads local processor's IRQ work area into %g6. */ #define TRAP_LOAD_IRQ_WORK \ - __GET_CPUID \ - sethi %hi(__irq_work), %g1; \ - sllx %g6, 6, %g6; \ - or %g1, %lo(__irq_work), %g1; \ - add %g1, %g6, %g6; + __GET_CPUID(%g1) \ + sethi %hi(__irq_work), %g6; \ + sllx %g1, 6, %g1; \ + or %g6, %lo(__irq_work), %g6; \ + add %g6, %g1, %g6; /* Clobbers %g1, loads %g6 with current thread info pointer. */ #define TRAP_LOAD_THREAD_REG \ - __GET_CPUID \ - sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ - sethi %hi(trap_block), %g1; \ - or %g1, %lo(trap_block), %g1; \ - ldx [%g1 + %g6], %g6; + __GET_CPUID(%g1) \ + sethi %hi(trap_block), %g6; \ + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \ + or %g6, %lo(trap_block), %g6; \ + ldx [%g6 + %g1], %g6; /* Given the current thread info pointer in %g6, load the per-cpu * area base of the current processor into %g5. REG1, REG2, and REG3 are @@ -109,7 +141,6 @@ extern void setup_tba(void); * trap will load the fully resolved %g5 per-cpu base. This can corrupt * the calculations done by the macro mid-stream. */ -#ifdef CONFIG_SMP #define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \ ldub [%g6 + TI_CPU], REG1; \ sethi %hi(__per_cpu_shift), REG3; \ @@ -118,8 +149,26 @@ extern void setup_tba(void); ldx [REG2 + %lo(__per_cpu_base)], REG2; \ sllx REG1, REG3, REG3; \ add REG3, REG2, %g5; + #else + +/* Uniprocessor versions, we know the cpuid is zero. */ +#define TRAP_LOAD_PGD_PHYS \ + sethi %hi(trap_block), %g7; \ + or %g7, %lo(trap_block), %g7; \ + ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; + +#define TRAP_LOAD_IRQ_WORK \ + sethi %hi(__irq_work), %g6; \ + or %g6, %lo(__irq_work), %g6; + +#define TRAP_LOAD_THREAD_REG \ + sethi %hi(trap_block), %g6; \ + ldx [%g6 + %lo(trap_block)], %g6; + +/* No per-cpu areas on uniprocessor, so no need to load %g5. */ #define LOAD_PER_CPU_BASE(REG1, REG2, REG3) -#endif + +#endif /* !(CONFIG_SMP) */ #endif /* _SPARC64_CPUDATA_H */ diff --git a/include/asm-sparc64/head.h b/include/asm-sparc64/head.h index 0abd3a674e8f..731c842f3d11 100644 --- a/include/asm-sparc64/head.h +++ b/include/asm-sparc64/head.h @@ -10,6 +10,7 @@ #define __CHEETAH_ID 0x003e0014 #define __JALAPENO_ID 0x003e0016 +#define __SERRANO_ID 0x003e0022 #define CHEETAH_MANUF 0x003e #define CHEETAH_IMPL 0x0014 /* Ultra-III */ diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index 473edb2603ec..ad1d35a7d13f 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -37,33 +37,7 @@ extern cpumask_t phys_cpu_present_map; * General functions that each host system must provide. */ -static __inline__ int hard_smp_processor_id(void) -{ - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - unsigned long cfg, ver; - __asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == 0x003e0016) { - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (cfg) - : "i" (ASI_JBUS_CONFIG)); - return ((cfg >> 17) & 0x1f); - } else { - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (cfg) - : "i" (ASI_SAFARI_CONFIG)); - return ((cfg >> 17) & 0x3ff); - } - } else if (this_is_starfire != 0) { - return starfire_hard_smp_processor_id(); - } else { - unsigned long upaconfig; - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (upaconfig) - : "i" (ASI_UPA_CONFIG)); - return ((upaconfig >> 17) & 0x1f); - } -} - +extern int hard_smp_processor_id(void); #define raw_smp_processor_id() (current_thread_info()->cpu) extern void smp_setup_cpu_possible_map(void); -- cgit v1.2.3-59-g8ed1b From ffe483d55229fadbaf4cc7316d47024a24ecd1a2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Feb 2006 21:55:10 -0800 Subject: [SPARC64]: Add explicit register args to trap state loading macros. This, as well as making the code cleaner, allows a simplification in the TSB miss handling path. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 8 ++-- arch/sparc64/kernel/etrap.S | 10 ++--- arch/sparc64/kernel/rtrap.S | 2 +- arch/sparc64/kernel/tsb.S | 9 +---- arch/sparc64/kernel/winfixup.S | 18 ++++----- include/asm-sparc64/cpudata.h | 88 +++++++++++++++++++++--------------------- 6 files changed, 64 insertions(+), 71 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index b3511ff5d04a..4ca3ea0beaf9 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -50,7 +50,7 @@ do_fpdis: add %g0, %g0, %g0 ba,a,pt %xcc, rtrap_clr_l6 -1: TRAP_LOAD_THREAD_REG +1: TRAP_LOAD_THREAD_REG(%g6, %g1) ldub [%g6 + TI_FPSAVED], %g5 wr %g0, FPRS_FEF, %fprs andcc %g5, FPRS_FEF, %g0 @@ -190,7 +190,7 @@ fp_other_bounce: .globl do_fpother_check_fitos .align 32 do_fpother_check_fitos: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) sethi %hi(fp_other_bounce - 4), %g7 or %g7, %lo(fp_other_bounce - 4), %g7 @@ -378,7 +378,7 @@ do_ivec: sllx %g2, %g4, %g2 sllx %g4, 2, %g4 - TRAP_LOAD_IRQ_WORK + TRAP_LOAD_IRQ_WORK(%g6, %g1) lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ @@ -422,7 +422,7 @@ setcc: .globl utrap_trap utrap_trap: /* %g3=handler,%g4=level */ - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_UTRAPS], %g1 brnz,pt %g1, invoke_utrap nop diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index d974d18b15be..b5f6bc52d917 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -31,7 +31,7 @@ .globl etrap, etrap_irq, etraptl1 etrap: rdpr %pil, %g2 etrap_irq: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -100,7 +100,7 @@ etrap_irq: stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 - LOAD_PER_CPU_BASE(%g4, %g3, %l1) + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1) jmpl %l2 + 0x4, %g0 ldx [%g6 + TI_TASK], %g4 @@ -124,7 +124,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. * 0x58 TL4's TT * 0x60 TL */ - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) sub %sp, ((4 * 8) * 4) + 8, %g2 rdpr %tl, %g1 @@ -179,7 +179,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. .align 64 .globl scetrap scetrap: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %pil, %g2 rdpr %tstate, %g1 sllx %g2, 20, %g3 @@ -250,7 +250,7 @@ scetrap: stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] mov %l6, %g6 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] - LOAD_PER_CPU_BASE(%g4, %g3, %l1) + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1) ldx [%g6 + TI_TASK], %g4 done diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index 64bc03610bc6..61bd45e7697e 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -226,7 +226,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 brz,pt %l3, 1f nop /* Must do this before thread reg is clobbered below. */ - LOAD_PER_CPU_BASE(%i0, %i1, %i2) + LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2) 1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index ff6a79beb98d..28e38b168dda 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -36,14 +36,7 @@ tsb_miss_itlb: nop tsb_miss_page_table_walk: - /* This clobbers %g1 and %g6, preserve them... */ - mov %g1, %g5 - mov %g6, %g2 - - TRAP_LOAD_PGD_PHYS - - mov %g2, %g6 - mov %g5, %g1 + TRAP_LOAD_PGD_PHYS(%g7, %g5) USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index 320a762d0519..211021ae6e8a 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -40,7 +40,7 @@ set_pcontext: */ .globl fill_fixup, spill_fixup fill_fixup: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 or %g4, FAULT_CODE_WINFIXUP, %g4 @@ -86,7 +86,7 @@ fill_fixup: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ldx [%g6 + TI_TASK], %g4 - LOAD_PER_CPU_BASE(%g1, %g2, %g3) + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) /* This is the same as below, except we handle this a bit special * since we must preserve %l5 and %l6, see comment above. @@ -105,7 +105,7 @@ fill_fixup: * do not touch %g7 or %g2 so we handle the two cases fine. */ spill_fixup: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -181,7 +181,7 @@ winfix_mna: wrpr %g3, %tnpc done fill_fixup_mna: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_mna_from_user_common @@ -209,14 +209,14 @@ fill_fixup_mna: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. - LOAD_PER_CPU_BASE(%g1, %g2, %g3) + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) call mem_address_unaligned add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_mna: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -284,7 +284,7 @@ winfix_dax: wrpr %g3, %tnpc done fill_fixup_dax: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_dax_from_user_common @@ -312,14 +312,14 @@ fill_fixup_dax: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. - LOAD_PER_CPU_BASE(%g1, %g2, %g3) + LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_dax: - TRAP_LOAD_THREAD_REG + TRAP_LOAD_THREAD_REG(%g6, %g1) ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index da54b4f35403..c15514f82c33 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -107,67 +107,67 @@ extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\ .previous; -/* Clobbers %g1, current address space PGD phys address into %g7. */ -#define TRAP_LOAD_PGD_PHYS \ - __GET_CPUID(%g1) \ - sethi %hi(trap_block), %g7; \ - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \ - or %g7, %lo(trap_block), %g7; \ - add %g7, %g1, %g7; \ - ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; - -/* Clobbers %g1, loads local processor's IRQ work area into %g6. */ -#define TRAP_LOAD_IRQ_WORK \ - __GET_CPUID(%g1) \ - sethi %hi(__irq_work), %g6; \ - sllx %g1, 6, %g1; \ - or %g6, %lo(__irq_work), %g6; \ - add %g6, %g1, %g6; - -/* Clobbers %g1, loads %g6 with current thread info pointer. */ -#define TRAP_LOAD_THREAD_REG \ - __GET_CPUID(%g1) \ - sethi %hi(trap_block), %g6; \ - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g1; \ - or %g6, %lo(trap_block), %g6; \ - ldx [%g6 + %g1], %g6; - -/* Given the current thread info pointer in %g6, load the per-cpu - * area base of the current processor into %g5. REG1, REG2, and REG3 are +/* Clobbers TMP, current address space PGD phys address into DEST. */ +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(trap_block), DEST; \ + sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ + or DEST, %lo(trap_block), DEST; \ + add DEST, TMP, DEST; \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; + +/* Clobbers TMP, loads local processor's IRQ work area into DEST. */ +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(__irq_work), DEST; \ + sllx TMP, 6, TMP; \ + or DEST, %lo(__irq_work), DEST; \ + add DEST, TMP, DEST; + +/* Clobbers TMP, loads DEST with current thread info pointer. */ +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + __GET_CPUID(TMP) \ + sethi %hi(trap_block), DEST; \ + sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ + or DEST, %lo(trap_block), DEST; \ + ldx [DEST + TMP], DEST; + +/* Given the current thread info pointer in THR, load the per-cpu + * area base of the current processor into DEST. REG1, REG2, and REG3 are * clobbered. * - * You absolutely cannot use %g5 as a temporary in this code. The + * You absolutely cannot use DEST as a temporary in this code. The * reason is that traps can happen during execution, and return from - * trap will load the fully resolved %g5 per-cpu base. This can corrupt + * trap will load the fully resolved DEST per-cpu base. This can corrupt * the calculations done by the macro mid-stream. */ -#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) \ - ldub [%g6 + TI_CPU], REG1; \ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ + ldub [THR + TI_CPU], REG1; \ sethi %hi(__per_cpu_shift), REG3; \ sethi %hi(__per_cpu_base), REG2; \ ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ ldx [REG2 + %lo(__per_cpu_base)], REG2; \ sllx REG1, REG3, REG3; \ - add REG3, REG2, %g5; + add REG3, REG2, DEST; #else /* Uniprocessor versions, we know the cpuid is zero. */ -#define TRAP_LOAD_PGD_PHYS \ - sethi %hi(trap_block), %g7; \ - or %g7, %lo(trap_block), %g7; \ - ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + sethi %hi(trap_block), DEST; \ + or DEST, %lo(trap_block), DEST; \ + ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; -#define TRAP_LOAD_IRQ_WORK \ - sethi %hi(__irq_work), %g6; \ - or %g6, %lo(__irq_work), %g6; +#define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ + sethi %hi(__irq_work), DEST; \ + or DEST, %lo(__irq_work), DEST; -#define TRAP_LOAD_THREAD_REG \ - sethi %hi(trap_block), %g6; \ - ldx [%g6 + %lo(trap_block)], %g6; +#define TRAP_LOAD_THREAD_REG(DEST, TMP) \ + sethi %hi(trap_block), DEST; \ + ldx [DEST + %lo(trap_block)], DEST; -/* No per-cpu areas on uniprocessor, so no need to load %g5. */ -#define LOAD_PER_CPU_BASE(REG1, REG2, REG3) +/* No per-cpu areas on uniprocessor, so no need to load DEST. */ +#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) #endif /* !(CONFIG_SMP) */ -- cgit v1.2.3-59-g8ed1b From d96b81533ba3d5775e45aee6986b2aa33c10801c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 4 Feb 2006 15:40:53 -0800 Subject: [SPARC64]: Add sun4v case to __GET_CPUID() patch tables. Signed-off-by: David S. Miller --- arch/sparc64/kernel/setup.c | 3 +++ include/asm-sparc64/cpudata.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index f751d11926bc..2918ed3eb1ba 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -520,6 +520,9 @@ static void __init per_cpu_patch(void) else insns = &p->cheetah_safari[0]; break; + case hypervisor: + insns = &p->sun4v[0]; + break; default: prom_printf("Unknown cpu type, halting.\n"); prom_halt(); diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index c15514f82c33..4f28a85c1043 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -68,6 +68,7 @@ struct cpuid_patch_entry { unsigned int cheetah_safari[4]; unsigned int cheetah_jbus[4]; unsigned int starfire[4]; + unsigned int sun4v[4]; }; extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; #endif @@ -79,6 +80,8 @@ extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; #define TRAP_BLOCK_SZ_SHIFT 6 +#include + #ifdef CONFIG_SMP #define __GET_CPUID(REG) \ @@ -105,6 +108,11 @@ extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; sllx REG, 9, REG; \ or REG, 0xd0, REG; \ lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\ + /* sun4v implementation. */ \ + mov SCRATCHPAD_CPUID, REG; \ + nop; \ + ldxa [REG] ASI_SCRATCHPAD, REG; \ + nop; \ .previous; /* Clobbers TMP, current address space PGD phys address into DEST. */ -- cgit v1.2.3-59-g8ed1b From 936f482af1743141d637483ec10eb881537c26dc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 5 Feb 2006 21:29:28 -0800 Subject: [SPARC64]: Add initial code to twiddle %gl on trap entry/exit. Instead of setting/clearing PSTATE_AG we have to change the %gl register value on sun4v. Signed-off-by: David S. Miller --- arch/sparc64/kernel/etrap.S | 17 +++++++++++++++-- arch/sparc64/kernel/rtrap.S | 16 +++++++++++++++- arch/sparc64/kernel/setup.c | 20 ++++++++++++++++++++ arch/sparc64/kernel/vmlinux.lds.S | 3 +++ include/asm-sparc64/cpudata.h | 5 +++++ include/asm-sparc64/head.h | 4 ++++ 6 files changed, 62 insertions(+), 3 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index f2556146a735..4d644949ad49 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -102,7 +102,14 @@ etrap_save: save %g2, -STACK_BIAS, %sp 2: mov %g4, %l4 mov %g5, %l5 add %g7, 4, %l2 - wrpr %g0, ETRAP_PSTATE1, %pstate + + /* Go to trap time globals so we can save them. */ +661: wrpr %g0, ETRAP_PSTATE1, %pstate + .section .gl_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + stx %g1, [%sp + PTREGS_OFF + PT_V9_G1] stx %g2, [%sp + PTREGS_OFF + PT_V9_G2] sllx %l7, 24, %l7 @@ -195,9 +202,15 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. rdpr %tt, %g3 stx %g3, [%g2 + STACK_BIAS + 0x78] - wrpr %g1, %tl stx %g1, [%g2 + STACK_BIAS + 0x80] + wrpr %g0, 1, %tl +661: nop + .section .gl_1insn_patch, "ax" + .word 661b + SET_GL(1) + .previous + rdpr %tstate, %g1 sub %g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2 ba,pt %xcc, 1b diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index ecfbbdc56125..e6130956307f 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -230,7 +230,14 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 - wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate + + /* Normal globals are restored, go to trap globals. */ +661: wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate + .section .gl_1insn_patch, "ax" + .word 661b + SET_GL(1) + .previous + ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0 ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1 @@ -304,6 +311,13 @@ user_rtt_fill_fixup: mov %g6, %l1 wrpr %g0, 0x0, %tl wrpr %g0, RTRAP_PSTATE, %pstate + +661: nop + .section .gl_1insn_patch, "ax" + .word 661b + SET_GL(0) + .previous + mov %l1, %g6 ldx [%g6 + TI_TASK], %g4 LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3) diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 2918ed3eb1ba..aaab319ad885 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -545,6 +545,24 @@ static void __init per_cpu_patch(void) #endif } +static void __init gl_patch(void) +{ + struct gl_1insn_patch_entry *p; + + if (tlb_type != hypervisor) + return; + + p = &__gl_1insn_patch; + while (p < &__gl_1insn_patch_end) { + unsigned long addr = p->addr; + + *(unsigned int *) (addr + 0) = p->insn; + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + p++; + } +} + void __init setup_arch(char **cmdline_p) { /* Initialize PROM console and command line. */ @@ -567,6 +585,8 @@ void __init setup_arch(char **cmdline_p) */ per_cpu_patch(); + gl_patch(); + boot_flags_init(*cmdline_p); idprom_init(); diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 1639d9c935c3..482d1ed87f4d 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -77,6 +77,9 @@ SECTIONS __cpuid_patch = .; .cpuid_patch : { *(.cpuid_patch) } __cpuid_patch_end = .; + __gl_1insn_patch = .; + .gl_1insn_patch : { *(.gl_1insn_patch) } + __gl_1insn_patch_end = .; . = ALIGN(8192); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 4f28a85c1043..8666440c89af 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -73,6 +73,11 @@ struct cpuid_patch_entry { extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; #endif +struct gl_1insn_patch_entry { + unsigned int addr; + unsigned int insn; +}; +extern struct gl_1insn_patch_entry __gl_1insn_patch, __gl_1insn_patch_end; #endif /* !(__ASSEMBLY__) */ #define TRAP_PER_CPU_THREAD 0x00 diff --git a/include/asm-sparc64/head.h b/include/asm-sparc64/head.h index 731c842f3d11..ff76c0981b63 100644 --- a/include/asm-sparc64/head.h +++ b/include/asm-sparc64/head.h @@ -4,6 +4,10 @@ #include + /* wrpr %g0, val, %gl */ +#define SET_GL(val) \ + .word 0xa1902000 | val + #define KERNBASE 0x400000 #define PTREGS_OFF (STACK_BIAS + STACKFRAME_SZ) -- cgit v1.2.3-59-g8ed1b From 45fec05f805a113372c9a7ff4c653ac749f6921c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 5 Feb 2006 22:27:28 -0800 Subject: [SPARC64]: Sanitize %pstate writes for sun4v. If we're just switching between different alternate global sets, nop it out on sun4v. Also, get rid of all of the alternate global save/restore in the OBP CIF trampoline code. Signed-off-by: David S. Miller --- arch/sparc64/kernel/ktlb.S | 18 +++- arch/sparc64/kernel/setup.c | 26 +++-- arch/sparc64/kernel/tsb.S | 12 ++- arch/sparc64/kernel/vmlinux.lds.S | 3 + arch/sparc64/mm/ultra.S | 18 +++- arch/sparc64/prom/cif.S | 211 +++----------------------------------- include/asm-sparc64/cpudata.h | 6 ++ 7 files changed, 88 insertions(+), 206 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 9b415ab6db6b..c1335432124e 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -60,8 +60,15 @@ kvmap_itlb_load: retry kvmap_itlb_longpath: - rdpr %pstate, %g5 + +661: rdpr %pstate, %g5 wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %tpc, %g5 ba,pt %xcc, sparc64_realfault_common mov FAULT_CODE_ITLB, %g4 @@ -161,8 +168,15 @@ kvmap_check_obp: nop kvmap_dtlb_longpath: - rdpr %pstate, %g5 + +661: rdpr %pstate, %g5 wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %tl, %g4 cmp %g4, 1 mov TLB_TAG_ACCESS, %g4 diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index aaab319ad885..e22bf5fc92ce 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -547,19 +547,33 @@ static void __init per_cpu_patch(void) static void __init gl_patch(void) { - struct gl_1insn_patch_entry *p; + struct gl_1insn_patch_entry *p1; + struct gl_2insn_patch_entry *p2; if (tlb_type != hypervisor) return; - p = &__gl_1insn_patch; - while (p < &__gl_1insn_patch_end) { - unsigned long addr = p->addr; + p1 = &__gl_1insn_patch; + while (p1 < &__gl_1insn_patch_end) { + unsigned long addr = p1->addr; - *(unsigned int *) (addr + 0) = p->insn; + *(unsigned int *) (addr + 0) = p1->insn; __asm__ __volatile__("flush %0" : : "r" (addr + 0)); - p++; + p1++; + } + + p2 = &__gl_2insn_patch; + while (p2 < &__gl_2insn_patch_end) { + unsigned long addr = p2->addr; + + *(unsigned int *) (addr + 0) = p2->insns[0]; + __asm__ __volatile__("flush %0" : : "r" (addr + 0)); + + *(unsigned int *) (addr + 3) = p2->insns[1]; + __asm__ __volatile__("flush %0" : : "r" (addr + 4)); + + p2++; } } diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index 3b45db98005a..96e63168d8b2 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -82,9 +82,17 @@ tsb_itlb_load: .globl tsb_do_fault tsb_do_fault: cmp %g3, FAULT_CODE_DTLB - rdpr %pstate, %g5 + +661: rdpr %pstate, %g5 + wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous + bne,pn %xcc, tsb_do_itlb_fault - wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate + nop tsb_do_dtlb_fault: rdpr %tl, %g4 diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 482d1ed87f4d..686bf6b3b03f 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -80,6 +80,9 @@ SECTIONS __gl_1insn_patch = .; .gl_1insn_patch : { *(.gl_1insn_patch) } __gl_1insn_patch_end = .; + __gl_2insn_patch = .; + .gl_2insn_patch : { *(.gl_2insn_patch) } + __gl_2insn_patch_end = .; . = ALIGN(8192); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index cac58d66fca9..5dd86ad0d29f 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -444,8 +444,15 @@ xcall_flush_tlb_kernel_range: /* 22 insns */ */ .globl xcall_sync_tick xcall_sync_tick: - rdpr %pstate, %g2 + +661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %pil, %g2 wrpr %g0, 15, %pil sethi %hi(109f), %g7 @@ -468,8 +475,15 @@ xcall_sync_tick: */ .globl xcall_report_regs xcall_report_regs: - rdpr %pstate, %g2 + +661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous + rdpr %pil, %g2 wrpr %g0, 15, %pil sethi %hi(109f), %g7 diff --git a/arch/sparc64/prom/cif.S b/arch/sparc64/prom/cif.S index 29d0ae74aed8..5f27ad779c0c 100644 --- a/arch/sparc64/prom/cif.S +++ b/arch/sparc64/prom/cif.S @@ -1,10 +1,12 @@ /* cif.S: PROM entry/exit assembler trampolines. * - * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) - * Copyright (C) 2005 David S. Miller + * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 2005, 2006 David S. Miller */ #include +#include +#include .text .globl prom_cif_interface @@ -12,78 +14,16 @@ prom_cif_interface: sethi %hi(p1275buf), %o0 or %o0, %lo(p1275buf), %o0 ldx [%o0 + 0x010], %o1 ! prom_cif_stack - save %o1, -0x190, %sp + save %o1, -192, %sp ldx [%i0 + 0x008], %l2 ! prom_cif_handler - rdpr %pstate, %l4 - wrpr %g0, 0x15, %pstate ! save alternate globals - stx %g1, [%sp + 2047 + 0x0b0] - stx %g2, [%sp + 2047 + 0x0b8] - stx %g3, [%sp + 2047 + 0x0c0] - stx %g4, [%sp + 2047 + 0x0c8] - stx %g5, [%sp + 2047 + 0x0d0] - stx %g6, [%sp + 2047 + 0x0d8] - stx %g7, [%sp + 2047 + 0x0e0] - wrpr %g0, 0x814, %pstate ! save interrupt globals - stx %g1, [%sp + 2047 + 0x0e8] - stx %g2, [%sp + 2047 + 0x0f0] - stx %g3, [%sp + 2047 + 0x0f8] - stx %g4, [%sp + 2047 + 0x100] - stx %g5, [%sp + 2047 + 0x108] - stx %g6, [%sp + 2047 + 0x110] - stx %g7, [%sp + 2047 + 0x118] - wrpr %g0, 0x14, %pstate ! save normal globals - stx %g1, [%sp + 2047 + 0x120] - stx %g2, [%sp + 2047 + 0x128] - stx %g3, [%sp + 2047 + 0x130] - stx %g4, [%sp + 2047 + 0x138] - stx %g5, [%sp + 2047 + 0x140] - stx %g6, [%sp + 2047 + 0x148] - stx %g7, [%sp + 2047 + 0x150] - wrpr %g0, 0x414, %pstate ! save mmu globals - stx %g1, [%sp + 2047 + 0x158] - stx %g2, [%sp + 2047 + 0x160] - stx %g3, [%sp + 2047 + 0x168] - stx %g4, [%sp + 2047 + 0x170] - stx %g5, [%sp + 2047 + 0x178] - stx %g6, [%sp + 2047 + 0x180] - stx %g7, [%sp + 2047 + 0x188] - mov %g1, %l0 ! also save to locals, so we can handle - mov %g2, %l1 ! tlb faults later on, when accessing - mov %g3, %l3 ! the stack. - mov %g7, %l5 - wrpr %l4, PSTATE_IE, %pstate ! turn off interrupts + mov %g4, %l0 + mov %g5, %l1 + mov %g6, %l3 call %l2 add %i0, 0x018, %o0 ! prom_args - wrpr %g0, 0x414, %pstate ! restore mmu globals - mov %l0, %g1 - mov %l1, %g2 - mov %l3, %g3 - mov %l5, %g7 - wrpr %g0, 0x14, %pstate ! restore normal globals - ldx [%sp + 2047 + 0x120], %g1 - ldx [%sp + 2047 + 0x128], %g2 - ldx [%sp + 2047 + 0x130], %g3 - ldx [%sp + 2047 + 0x138], %g4 - ldx [%sp + 2047 + 0x140], %g5 - ldx [%sp + 2047 + 0x148], %g6 - ldx [%sp + 2047 + 0x150], %g7 - wrpr %g0, 0x814, %pstate ! restore interrupt globals - ldx [%sp + 2047 + 0x0e8], %g1 - ldx [%sp + 2047 + 0x0f0], %g2 - ldx [%sp + 2047 + 0x0f8], %g3 - ldx [%sp + 2047 + 0x100], %g4 - ldx [%sp + 2047 + 0x108], %g5 - ldx [%sp + 2047 + 0x110], %g6 - ldx [%sp + 2047 + 0x118], %g7 - wrpr %g0, 0x15, %pstate ! restore alternate globals - ldx [%sp + 2047 + 0x0b0], %g1 - ldx [%sp + 2047 + 0x0b8], %g2 - ldx [%sp + 2047 + 0x0c0], %g3 - ldx [%sp + 2047 + 0x0c8], %g4 - ldx [%sp + 2047 + 0x0d0], %g5 - ldx [%sp + 2047 + 0x0d8], %g6 - ldx [%sp + 2047 + 0x0e0], %g7 - wrpr %l4, 0, %pstate ! restore original pstate + mov %l0, %g4 + mov %l1, %g5 + mov %l3, %g6 ret restore @@ -91,135 +31,18 @@ prom_cif_interface: prom_cif_callback: sethi %hi(p1275buf), %o1 or %o1, %lo(p1275buf), %o1 - save %sp, -0x270, %sp - rdpr %pstate, %l4 - wrpr %g0, 0x15, %pstate ! save PROM alternate globals - stx %g1, [%sp + 2047 + 0x0b0] - stx %g2, [%sp + 2047 + 0x0b8] - stx %g3, [%sp + 2047 + 0x0c0] - stx %g4, [%sp + 2047 + 0x0c8] - stx %g5, [%sp + 2047 + 0x0d0] - stx %g6, [%sp + 2047 + 0x0d8] - stx %g7, [%sp + 2047 + 0x0e0] - ! restore Linux alternate globals - ldx [%sp + 2047 + 0x190], %g1 - ldx [%sp + 2047 + 0x198], %g2 - ldx [%sp + 2047 + 0x1a0], %g3 - ldx [%sp + 2047 + 0x1a8], %g4 - ldx [%sp + 2047 + 0x1b0], %g5 - ldx [%sp + 2047 + 0x1b8], %g6 - ldx [%sp + 2047 + 0x1c0], %g7 - wrpr %g0, 0x814, %pstate ! save PROM interrupt globals - stx %g1, [%sp + 2047 + 0x0e8] - stx %g2, [%sp + 2047 + 0x0f0] - stx %g3, [%sp + 2047 + 0x0f8] - stx %g4, [%sp + 2047 + 0x100] - stx %g5, [%sp + 2047 + 0x108] - stx %g6, [%sp + 2047 + 0x110] - stx %g7, [%sp + 2047 + 0x118] - ! restore Linux interrupt globals - ldx [%sp + 2047 + 0x1c8], %g1 - ldx [%sp + 2047 + 0x1d0], %g2 - ldx [%sp + 2047 + 0x1d8], %g3 - ldx [%sp + 2047 + 0x1e0], %g4 - ldx [%sp + 2047 + 0x1e8], %g5 - ldx [%sp + 2047 + 0x1f0], %g6 - ldx [%sp + 2047 + 0x1f8], %g7 - wrpr %g0, 0x14, %pstate ! save PROM normal globals - stx %g1, [%sp + 2047 + 0x120] - stx %g2, [%sp + 2047 + 0x128] - stx %g3, [%sp + 2047 + 0x130] - stx %g4, [%sp + 2047 + 0x138] - stx %g5, [%sp + 2047 + 0x140] - stx %g6, [%sp + 2047 + 0x148] - stx %g7, [%sp + 2047 + 0x150] - ! restore Linux normal globals - ldx [%sp + 2047 + 0x200], %g1 - ldx [%sp + 2047 + 0x208], %g2 - ldx [%sp + 2047 + 0x210], %g3 - ldx [%sp + 2047 + 0x218], %g4 - ldx [%sp + 2047 + 0x220], %g5 - ldx [%sp + 2047 + 0x228], %g6 - ldx [%sp + 2047 + 0x230], %g7 - wrpr %g0, 0x414, %pstate ! save PROM mmu globals - stx %g1, [%sp + 2047 + 0x158] - stx %g2, [%sp + 2047 + 0x160] - stx %g3, [%sp + 2047 + 0x168] - stx %g4, [%sp + 2047 + 0x170] - stx %g5, [%sp + 2047 + 0x178] - stx %g6, [%sp + 2047 + 0x180] - stx %g7, [%sp + 2047 + 0x188] - ! restore Linux mmu globals - ldx [%sp + 2047 + 0x238], %o0 - ldx [%sp + 2047 + 0x240], %o1 - ldx [%sp + 2047 + 0x248], %l2 - ldx [%sp + 2047 + 0x250], %l3 - ldx [%sp + 2047 + 0x258], %l5 - ldx [%sp + 2047 + 0x260], %l6 - ldx [%sp + 2047 + 0x268], %l7 - ! switch to Linux tba - sethi %hi(sparc64_ttable_tl0), %l1 - rdpr %tba, %l0 ! save PROM tba - mov %o0, %g1 - mov %o1, %g2 - mov %l2, %g3 - mov %l3, %g4 - mov %l5, %g5 - mov %l6, %g6 - mov %l7, %g7 - wrpr %l1, %tba ! install Linux tba - wrpr %l4, 0, %pstate ! restore PSTATE + save %sp, -192, %sp + TRAP_LOAD_THREAD_REG(%g6, %g1) + LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %o0) + ldx [%g6 + TI_TASK], %g4 call prom_world - mov %g0, %o0 + mov 0, %o0 ldx [%i1 + 0x000], %l2 call %l2 mov %i0, %o0 mov %o0, %l1 call prom_world - or %g0, 1, %o0 - wrpr %g0, 0x14, %pstate ! interrupts off - ! restore PROM mmu globals - ldx [%sp + 2047 + 0x158], %o0 - ldx [%sp + 2047 + 0x160], %o1 - ldx [%sp + 2047 + 0x168], %l2 - ldx [%sp + 2047 + 0x170], %l3 - ldx [%sp + 2047 + 0x178], %l5 - ldx [%sp + 2047 + 0x180], %l6 - ldx [%sp + 2047 + 0x188], %l7 - wrpr %g0, 0x414, %pstate ! restore PROM mmu globals - mov %o0, %g1 - mov %o1, %g2 - mov %l2, %g3 - mov %l3, %g4 - mov %l5, %g5 - mov %l6, %g6 - mov %l7, %g7 - wrpr %l0, %tba ! restore PROM tba - wrpr %g0, 0x14, %pstate ! restore PROM normal globals - ldx [%sp + 2047 + 0x120], %g1 - ldx [%sp + 2047 + 0x128], %g2 - ldx [%sp + 2047 + 0x130], %g3 - ldx [%sp + 2047 + 0x138], %g4 - ldx [%sp + 2047 + 0x140], %g5 - ldx [%sp + 2047 + 0x148], %g6 - ldx [%sp + 2047 + 0x150], %g7 - wrpr %g0, 0x814, %pstate ! restore PROM interrupt globals - ldx [%sp + 2047 + 0x0e8], %g1 - ldx [%sp + 2047 + 0x0f0], %g2 - ldx [%sp + 2047 + 0x0f8], %g3 - ldx [%sp + 2047 + 0x100], %g4 - ldx [%sp + 2047 + 0x108], %g5 - ldx [%sp + 2047 + 0x110], %g6 - ldx [%sp + 2047 + 0x118], %g7 - wrpr %g0, 0x15, %pstate ! restore PROM alternate globals - ldx [%sp + 2047 + 0x0b0], %g1 - ldx [%sp + 2047 + 0x0b8], %g2 - ldx [%sp + 2047 + 0x0c0], %g3 - ldx [%sp + 2047 + 0x0c8], %g4 - ldx [%sp + 2047 + 0x0d0], %g5 - ldx [%sp + 2047 + 0x0d8], %g6 - ldx [%sp + 2047 + 0x0e0], %g7 - wrpr %l4, 0, %pstate + mov 1, %o0 ret restore %l1, 0, %o0 diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 8666440c89af..998145b92653 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -78,6 +78,12 @@ struct gl_1insn_patch_entry { unsigned int insn; }; extern struct gl_1insn_patch_entry __gl_1insn_patch, __gl_1insn_patch_end; + +struct gl_2insn_patch_entry { + unsigned int addr; + unsigned int insns[2]; +}; +extern struct gl_2insn_patch_entry __gl_2insn_patch, __gl_2insn_patch_end; #endif /* !(__ASSEMBLY__) */ #define TRAP_PER_CPU_THREAD 0x00 -- cgit v1.2.3-59-g8ed1b From d257d5da39a78b32721ca84b2ba7f461f2f7ed7f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 6 Feb 2006 23:44:37 -0800 Subject: [SPARC64]: Initial sun4v TLB miss handling infrastructure. Things are a little tricky because, unlike sun4u, we have to: 1) do a hypervisor trap to do the TLB load. 2) do the TSB lookup calculations by hand Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 1 + arch/sparc64/kernel/ktlb.S | 12 +- arch/sparc64/kernel/sun4v_tlb_miss.S | 219 +++++++++++++++++++++++++++++++++++ arch/sparc64/kernel/tsb.S | 89 +++++++++++--- arch/sparc64/kernel/vmlinux.lds.S | 3 + arch/sparc64/mm/init.c | 24 +++- include/asm-sparc64/cpudata.h | 8 +- include/asm-sparc64/tsb.h | 11 +- 8 files changed, 349 insertions(+), 18 deletions(-) create mode 100644 arch/sparc64/kernel/sun4v_tlb_miss.S (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index 7840271d7aae..03fc0b5b1d98 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -474,6 +474,7 @@ setup_tba: sparc64_boot_end: #include "systbls.S" +#include "sun4v_tlb_miss.S" #include "ktlb.S" #include "tsb.S" #include "etrap.S" diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index c1335432124e..2e55084a0c12 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -16,12 +16,16 @@ .text .align 32 - .globl kvmap_itlb kvmap_itlb: /* g6: TAG TARGET */ mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_IMMU, %g4 + /* sun4v_itlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +kvmap_itlb_4v: + kvmap_itlb_nonlinear: /* Catch kernel NULL pointer calls. */ sethi %hi(PAGE_SIZE), %g5 @@ -94,11 +98,15 @@ kvmap_dtlb_obp: nop .align 32 - .globl kvmap_dtlb kvmap_dtlb: /* %g6: TAG TARGET */ mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_DMMU, %g4 + + /* sun4v_dtlb_miss branches here with the missing virtual + * address already loaded into %g4 + */ +kvmap_dtlb_4v: brgez,pn %g4, kvmap_dtlb_nonlinear nop diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S new file mode 100644 index 000000000000..58ea5dd8573c --- /dev/null +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -0,0 +1,219 @@ +/* sun4v_tlb_miss.S: Sun4v TLB miss handlers. + * + * Copyright (C) 2006 + */ + + .text + .align 32 + +sun4v_itlb_miss: + /* Load CPU ID into %g3. */ + mov SCRATCHPAD_CPUID, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g3 + + /* Load UTSB reg into %g1. */ + ldxa [%g1 + %g1] ASI_SCRATCHPAD, %g1 + + /* Load &trap_block[smp_processor_id()] into %g2. */ + sethi %hi(trap_block), %g2 + or %g2, %lo(trap_block), %g2 + sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + add %g2, %g3, %g2 + + /* Create a TAG TARGET, "(vaddr>>22) | (ctx << 48)", in %g6. + * Branch if kernel TLB miss. The kernel TSB and user TSB miss + * code wants the missing virtual address in %g4, so that value + * cannot be modified through the entirety of this handler. + */ + ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5 + srlx %g4, 22, %g3 + sllx %g5, 48, %g6 + or %g6, %g3, %g6 + brz,pn %g5, kvmap_itlb_4v + nop + + /* Create TSB pointer. This is something like: + * + * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; + * tsb_base = tsb_reg & ~0x7UL; + */ + and %g1, 0x7, %g3 + andn %g1, 0x7, %g1 + mov 512, %g7 + sllx %g7, %g3, %g7 + sub %g7, 1, %g7 + + /* TSB index mask is in %g7, tsb base is in %g1. Compute + * the TSB entry pointer into %g1: + * + * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask); + * tsb_ptr = tsb_base + (tsb_index * 16); + */ + srlx %g4, PAGE_SHIFT, %g3 + and %g3, %g7, %g3 + sllx %g3, 4, %g3 + add %g1, %g3, %g1 + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS, %g2 + cmp %g2, %g6 + sethi %hi(_PAGE_EXEC), %g7 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_ITLB, %g3 + andcc %g3, %g7, %g0 + be,a,pn %xcc, tsb_do_fault + mov FAULT_CODE_ITLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * I-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + * %g6: TAG TARGET (only "CTX << 48" part matters) + */ +sun4v_itlb_load: + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + srlx %g6, 48, %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_IMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + mov %g1, %o0 ! restore %o0 + mov %g2, %o1 ! restore %o1 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_miss: + /* Load CPU ID into %g3. */ + mov SCRATCHPAD_CPUID, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g3 + + /* Load UTSB reg into %g1. */ + ldxa [%g1 + %g1] ASI_SCRATCHPAD, %g1 + + /* Load &trap_block[smp_processor_id()] into %g2. */ + sethi %hi(trap_block), %g2 + or %g2, %lo(trap_block), %g2 + sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + add %g2, %g3, %g2 + + /* Create a TAG TARGET, "(vaddr>>22) | (ctx << 48)", in %g6. + * Branch if kernel TLB miss. The kernel TSB and user TSB miss + * code wants the missing virtual address in %g4, so that value + * cannot be modified through the entirety of this handler. + */ + ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 + srlx %g4, 22, %g3 + sllx %g5, 48, %g6 + or %g6, %g3, %g6 + brz,pn %g5, kvmap_dtlb_4v + nop + + /* Create TSB pointer. This is something like: + * + * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL; + * tsb_base = tsb_reg & ~0x7UL; + */ + and %g1, 0x7, %g3 + andn %g1, 0x7, %g1 + mov 512, %g7 + sllx %g7, %g3, %g7 + sub %g7, 1, %g7 + + /* TSB index mask is in %g7, tsb base is in %g1. Compute + * the TSB entry pointer into %g1: + * + * tsb_index = ((vaddr >> PAGE_SHIFT) & tsb_mask); + * tsb_ptr = tsb_base + (tsb_index * 16); + */ + srlx %g4, PAGE_SHIFT, %g3 + and %g3, %g7, %g3 + sllx %g3, 4, %g3 + add %g1, %g3, %g1 + + /* Load TSB tag/pte into %g2/%g3 and compare the tag. */ + ldda [%g1] ASI_QUAD_LDD_PHYS, %g2 + cmp %g2, %g6 + bne,a,pn %xcc, tsb_miss_page_table_walk + mov FAULT_CODE_ITLB, %g3 + + /* We have a valid entry, make hypervisor call to load + * D-TLB and return from trap. + * + * %g3: PTE + * %g4: vaddr + * %g6: TAG TARGET (only "CTX << 48" part matters) + */ +sun4v_dtlb_load: + mov %o0, %g1 ! save %o0 + mov %o1, %g2 ! save %o1 + mov %o2, %g5 ! save %o2 + mov %o3, %g7 ! save %o3 + mov %g4, %o0 ! vaddr + srlx %g6, 48, %o1 ! ctx + mov %g3, %o2 ! PTE + mov HV_MMU_DMMU, %o3 ! flags + ta HV_MMU_MAP_ADDR_TRAP + mov %g1, %o0 ! restore %o0 + mov %g2, %o1 ! restore %o1 + mov %g5, %o2 ! restore %o2 + mov %g7, %o3 ! restore %o3 + + retry + +sun4v_dtlb_prot: + /* Load CPU ID into %g3. */ + mov SCRATCHPAD_CPUID, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g3 + + /* Load &trap_block[smp_processor_id()] into %g2. */ + sethi %hi(trap_block), %g2 + or %g2, %lo(trap_block), %g2 + sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 + add %g2, %g3, %g2 + + ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g5 + rdpr %tl, %g1 + cmp %g1, 1 + bgu,pn %xcc, winfix_trampoline + nop + ba,pt %xcc, sparc64_realfault_common + mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 + +#define BRANCH_ALWAYS 0x10680000 +#define NOP 0x01000000 +#define SUN4V_DO_PATCH(OLD, NEW) \ + sethi %hi(NEW), %g1; \ + or %g1, %lo(NEW), %g1; \ + sethi %hi(OLD), %g2; \ + or %g2, %lo(OLD), %g2; \ + sub %g1, %g2, %g1; \ + sethi %hi(BRANCH_ALWAYS), %g3; \ + srl %g1, 2, %g1; \ + or %g3, %lo(BRANCH_ALWAYS), %g3; \ + or %g3, %g1, %g3; \ + stw %g3, [%g2]; \ + sethi %hi(NOP), %g3; \ + or %g3, %lo(NOP), %g3; \ + stw %g3, [%g2 + 0x4]; \ + flush %g2; + + .globl sun4v_patch_tlb_handlers + .type sun4v_patch_tlb_handlers,#function +sun4v_patch_tlb_handlers: + SUN4V_DO_PATCH(tl0_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl1_iamiss, sun4v_itlb_miss) + SUN4V_DO_PATCH(tl0_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl1_damiss, sun4v_dtlb_miss) + SUN4V_DO_PATCH(tl0_daprot, sun4v_dtlb_prot) + SUN4V_DO_PATCH(tl1_daprot, sun4v_dtlb_prot) + retl + nop + .size sun4v_patch_tlb_handlers,.-sun4v_patch_tlb_handlers diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index 96e63168d8b2..818bc9e9135a 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -18,30 +18,33 @@ * %g4: available temporary * %g5: available temporary * %g6: TAG TARGET - * %g7: physical address base of the linux page + * %g7: available temporary, will be loaded by us with + * the physical address base of the linux page * tables for the current address space */ - .globl tsb_miss_dtlb tsb_miss_dtlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_DMMU, %g4 ba,pt %xcc, tsb_miss_page_table_walk nop - .globl tsb_miss_itlb tsb_miss_itlb: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_IMMU, %g4 ba,pt %xcc, tsb_miss_page_table_walk nop + /* The sun4v TLB miss handlers jump directly here instead + * of tsb_miss_{d,i}tlb with the missing virtual address + * already loaded into %g4. + */ tsb_miss_page_table_walk: TRAP_LOAD_PGD_PHYS(%g7, %g5) USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) tsb_reload: - TSB_LOCK_TAG(%g1, %g2, %g4) + TSB_LOCK_TAG(%g1, %g2, %g7) /* Load and check PTE. */ ldxa [%g5] ASI_PHYS_USE_EC, %g5 @@ -52,9 +55,9 @@ tsb_reload: * bother putting it into the TSB. */ srlx %g5, 32, %g2 - sethi %hi(_PAGE_ALL_SZ_BITS >> 32), %g4 + sethi %hi(_PAGE_ALL_SZ_BITS >> 32), %g7 + and %g2, %g7, %g2 sethi %hi(_PAGE_SZBITS >> 32), %g7 - and %g2, %g4, %g2 cmp %g2, %g7 bne,a,pn %xcc, tsb_tlb_reload TSB_STORE(%g1, %g0) @@ -68,12 +71,54 @@ tsb_tlb_reload: nop tsb_dtlb_load: - stxa %g5, [%g0] ASI_DTLB_DATA_IN + +661: stxa %g5, [%g0] ASI_DTLB_DATA_IN retry + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_DTLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_dtlb_load + mov %g5, %g3 tsb_itlb_load: - stxa %g5, [%g0] ASI_ITLB_DATA_IN + +661: stxa %g5, [%g0] ASI_ITLB_DATA_IN retry + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous + + /* For sun4v the ASI_ITLB_DATA_IN store and the retry + * instruction get nop'd out and we get here to branch + * to the sun4v tlb load code. The registers are setup + * as follows: + * + * %g4: vaddr + * %g5: PTE + * %g6: TAG + * + * The sun4v TLB load wants the PTE in %g3 so we fix that + * up here. + */ + ba,pt %xcc, sun4v_itlb_load + mov %g5, %g3 /* No valid entry in the page tables, do full fault * processing. @@ -95,10 +140,17 @@ tsb_do_fault: nop tsb_do_dtlb_fault: - rdpr %tl, %g4 - cmp %g4, 1 - mov TLB_TAG_ACCESS, %g4 + rdpr %tl, %g3 + cmp %g3, 1 + +661: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_DMMU, %g5 + .section .gl_2insn_patch, "ax" + .word 661b + mov %g4, %g5 + nop + .previous + be,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB, %g4 ba,pt %xcc, winfix_trampoline @@ -196,12 +248,23 @@ __tsb_context_switch: add %g2, %g1, %g2 stx %o0, [%g2 + TRAP_PER_CPU_PGD_PADDR] - mov TSB_REG, %g1 +661: mov TSB_REG, %g1 stxa %o1, [%g1] ASI_DMMU + .section .gl_2insn_patch, "ax" + .word 661b + mov SCRATCHPAD_UTSBREG1, %g1 + stxa %o1, [%g1] ASI_SCRATCHPAD + .previous + membar #Sync - stxa %o1, [%g1] ASI_IMMU +661: stxa %o1, [%g1] ASI_IMMU membar #Sync + .section .gl_2insn_patch, "ax" + .word 661b + nop + nop + .previous brz %o2, 9f nop diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index 686bf6b3b03f..a09a8a2383dd 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -71,6 +71,9 @@ SECTIONS __con_initcall_end = .; SECURITY_INIT . = ALIGN(4); + __tsb_ldquad_phys_patch = .; + .tsb_ldquad_phys_patch : { *(.tsb_ldquad_phys_patch) } + __tsb_ldquad_phys_patch_end = .; __tsb_phys_patch = .; .tsb_phys_patch : { *(.tsb_phys_patch) } __tsb_phys_patch_end = .; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index ab50cd9618f3..e9aac424877f 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1050,8 +1050,25 @@ unsigned long __init find_ecache_flush_span(unsigned long size) static void __init tsb_phys_patch(void) { + struct tsb_ldquad_phys_patch_entry *pquad; struct tsb_phys_patch_entry *p; + pquad = &__tsb_ldquad_phys_patch; + while (pquad < &__tsb_ldquad_phys_patch_end) { + unsigned long addr = pquad->addr; + + if (tlb_type == hypervisor) + *(unsigned int *) addr = pquad->sun4v_insn; + else + *(unsigned int *) addr = pquad->sun4u_insn; + wmb(); + __asm__ __volatile__("flush %0" + : /* no outputs */ + : "r" (addr)); + + pquad++; + } + p = &__tsb_phys_patch; while (p < &__tsb_phys_patch_end) { unsigned long addr = p->addr; @@ -1069,6 +1086,7 @@ static void __init tsb_phys_patch(void) /* paging_init() sets up the page tables */ extern void cheetah_ecache_flush_init(void); +extern void sun4v_patch_tlb_handlers(void); static unsigned long last_valid_pfn; pgd_t swapper_pg_dir[2048]; @@ -1078,9 +1096,13 @@ void __init paging_init(void) unsigned long end_pfn, pages_avail, shift; unsigned long real_end, i; - if (tlb_type == cheetah_plus) + if (tlb_type == cheetah_plus || + tlb_type == hypervisor) tsb_phys_patch(); + if (tlb_type == hypervisor) + sun4v_patch_tlb_handlers(); + /* Find available physical memory... */ read_obp_memory("available", &pavail[0], &pavail_ents); diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 998145b92653..a3dc4afc4b21 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -6,6 +6,8 @@ #ifndef _SPARC64_CPUDATA_H #define _SPARC64_CPUDATA_H +#include + #ifndef __ASSEMBLY__ #include @@ -57,6 +59,9 @@ struct trap_per_cpu { /* D-cache line 2 */ unsigned long __pad2[4]; + +/* Dcache lines 3 and 4 */ + struct hv_fault_status fault_info; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(void); @@ -88,8 +93,9 @@ extern struct gl_2insn_patch_entry __gl_2insn_patch, __gl_2insn_patch_end; #define TRAP_PER_CPU_THREAD 0x00 #define TRAP_PER_CPU_PGD_PADDR 0x08 +#define TRAP_PER_CPU_FAULT_INFO 0x20 -#define TRAP_BLOCK_SZ_SHIFT 6 +#define TRAP_BLOCK_SZ_SHIFT 7 #include diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h index 44709cde5617..7f3abc32c4dd 100644 --- a/include/asm-sparc64/tsb.h +++ b/include/asm-sparc64/tsb.h @@ -53,6 +53,14 @@ * kernel image, so we don't play these games for swapper_tsb access. */ #ifndef __ASSEMBLY__ +struct tsb_ldquad_phys_patch_entry { + unsigned int addr; + unsigned int sun4u_insn; + unsigned int sun4v_insn; +}; +extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch, + __tsb_ldquad_phys_patch_end; + struct tsb_phys_patch_entry { unsigned int addr; unsigned int insn; @@ -61,9 +69,10 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; #endif #define TSB_LOAD_QUAD(TSB, REG) \ 661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \ - .section .tsb_phys_patch, "ax"; \ + .section .tsb_ldquad_phys_patch, "ax"; \ .word 661b; \ ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \ + ldda [TSB] ASI_QUAD_LDD_PHYS_4V, REG; \ .previous #define TSB_LOAD_TAG_HIGH(TSB, REG) \ -- cgit v1.2.3-59-g8ed1b From df7d6aec96ab98cb182dd5138a85bdc363a9bf0d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 7 Feb 2006 00:00:16 -0800 Subject: [SPARC64]: Rename gl_{1,2}insn_patch --> sun4v_{1,2}insn_patch Signed-off-by: David S. Miller --- arch/sparc64/kernel/etrap.S | 4 ++-- arch/sparc64/kernel/ktlb.S | 4 ++-- arch/sparc64/kernel/rtrap.S | 4 ++-- arch/sparc64/kernel/setup.c | 16 ++++++++-------- arch/sparc64/kernel/tsb.S | 12 ++++++------ arch/sparc64/kernel/vmlinux.lds.S | 12 ++++++------ arch/sparc64/mm/ultra.S | 4 ++-- include/asm-sparc64/cpudata.h | 11 +++++++---- 8 files changed, 35 insertions(+), 32 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 4d644949ad49..d8c062a1700c 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -105,7 +105,7 @@ etrap_save: save %g2, -STACK_BIAS, %sp /* Go to trap time globals so we can save them. */ 661: wrpr %g0, ETRAP_PSTATE1, %pstate - .section .gl_1insn_patch, "ax" + .section .sun4v_1insn_patch, "ax" .word 661b SET_GL(0) .previous @@ -206,7 +206,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. wrpr %g0, 1, %tl 661: nop - .section .gl_1insn_patch, "ax" + .section .sun4v_1insn_patch, "ax" .word 661b SET_GL(1) .previous diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S index 2e55084a0c12..f6bb2e08964a 100644 --- a/arch/sparc64/kernel/ktlb.S +++ b/arch/sparc64/kernel/ktlb.S @@ -67,7 +67,7 @@ kvmap_itlb_longpath: 661: rdpr %pstate, %g5 wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop @@ -179,7 +179,7 @@ kvmap_dtlb_longpath: 661: rdpr %pstate, %g5 wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index a2fa277da62b..a55d517e76aa 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -234,7 +234,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 /* Normal globals are restored, go to trap globals. */ 661: wrpr %g0, RTRAP_PSTATE_AG_IRQOFF, %pstate - .section .gl_1insn_patch, "ax" + .section .sun4v_1insn_patch, "ax" .word 661b SET_GL(1) .previous @@ -316,7 +316,7 @@ user_rtt_fill_fixup: wrpr %g0, RTRAP_PSTATE, %pstate 661: nop - .section .gl_1insn_patch, "ax" + .section .sun4v_1insn_patch, "ax" .word 661b SET_GL(0) .previous diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 40acac5b8337..6d6178efd587 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -549,16 +549,16 @@ static void __init per_cpu_patch(void) #endif } -static void __init gl_patch(void) +static void __init sun4v_patch(void) { - struct gl_1insn_patch_entry *p1; - struct gl_2insn_patch_entry *p2; + struct sun4v_1insn_patch_entry *p1; + struct sun4v_2insn_patch_entry *p2; if (tlb_type != hypervisor) return; - p1 = &__gl_1insn_patch; - while (p1 < &__gl_1insn_patch_end) { + p1 = &__sun4v_1insn_patch; + while (p1 < &__sun4v_1insn_patch_end) { unsigned long addr = p1->addr; *(unsigned int *) (addr + 0) = p1->insn; @@ -568,8 +568,8 @@ static void __init gl_patch(void) p1++; } - p2 = &__gl_2insn_patch; - while (p2 < &__gl_2insn_patch_end) { + p2 = &__sun4v_2insn_patch; + while (p2 < &__sun4v_2insn_patch_end) { unsigned long addr = p2->addr; *(unsigned int *) (addr + 0) = p2->insns[0]; @@ -606,7 +606,7 @@ void __init setup_arch(char **cmdline_p) */ per_cpu_patch(); - gl_patch(); + sun4v_patch(); boot_flags_init(*cmdline_p); diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index 818bc9e9135a..819a6ef9799f 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -74,7 +74,7 @@ tsb_dtlb_load: 661: stxa %g5, [%g0] ASI_DTLB_DATA_IN retry - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop @@ -99,7 +99,7 @@ tsb_itlb_load: 661: stxa %g5, [%g0] ASI_ITLB_DATA_IN retry - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop @@ -130,7 +130,7 @@ tsb_do_fault: 661: rdpr %pstate, %g5 wrpr %g5, PSTATE_AG | PSTATE_MG, %pstate - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop @@ -145,7 +145,7 @@ tsb_do_dtlb_fault: 661: mov TLB_TAG_ACCESS, %g4 ldxa [%g4] ASI_DMMU, %g5 - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b mov %g4, %g5 nop @@ -250,7 +250,7 @@ __tsb_context_switch: 661: mov TSB_REG, %g1 stxa %o1, [%g1] ASI_DMMU - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b mov SCRATCHPAD_UTSBREG1, %g1 stxa %o1, [%g1] ASI_SCRATCHPAD @@ -260,7 +260,7 @@ __tsb_context_switch: 661: stxa %o1, [%g1] ASI_IMMU membar #Sync - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S index a09a8a2383dd..b097379a49a8 100644 --- a/arch/sparc64/kernel/vmlinux.lds.S +++ b/arch/sparc64/kernel/vmlinux.lds.S @@ -80,12 +80,12 @@ SECTIONS __cpuid_patch = .; .cpuid_patch : { *(.cpuid_patch) } __cpuid_patch_end = .; - __gl_1insn_patch = .; - .gl_1insn_patch : { *(.gl_1insn_patch) } - __gl_1insn_patch_end = .; - __gl_2insn_patch = .; - .gl_2insn_patch : { *(.gl_2insn_patch) } - __gl_2insn_patch_end = .; + __sun4v_1insn_patch = .; + .sun4v_1insn_patch : { *(.sun4v_1insn_patch) } + __sun4v_1insn_patch_end = .; + __sun4v_2insn_patch = .; + .sun4v_2insn_patch : { *(.sun4v_2insn_patch) } + __sun4v_2insn_patch_end = .; . = ALIGN(8192); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 5dd86ad0d29f..8c244932b1c2 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -447,7 +447,7 @@ xcall_sync_tick: 661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop @@ -478,7 +478,7 @@ xcall_report_regs: 661: rdpr %pstate, %g2 wrpr %g2, PSTATE_IG | PSTATE_AG, %pstate - .section .gl_2insn_patch, "ax" + .section .sun4v_2insn_patch, "ax" .word 661b nop nop diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index a3dc4afc4b21..26b1dc9afbf1 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -78,17 +78,20 @@ struct cpuid_patch_entry { extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; #endif -struct gl_1insn_patch_entry { +struct sun4v_1insn_patch_entry { unsigned int addr; unsigned int insn; }; -extern struct gl_1insn_patch_entry __gl_1insn_patch, __gl_1insn_patch_end; +extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch, + __sun4v_1insn_patch_end; -struct gl_2insn_patch_entry { +struct sun4v_2insn_patch_entry { unsigned int addr; unsigned int insns[2]; }; -extern struct gl_2insn_patch_entry __gl_2insn_patch, __gl_2insn_patch_end; +extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, + __sun4v_2insn_patch_end; + #endif /* !(__ASSEMBLY__) */ #define TRAP_PER_CPU_THREAD 0x00 -- cgit v1.2.3-59-g8ed1b From 89a5264f065672a882f555228000614a6b2182b7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 7 Feb 2006 21:15:41 -0800 Subject: [SPARC64]: asm/cpudata.h needs asm/asi.h For the expansion of __GET_CPUID() on SMP. Signed-off-by: David S. Miller --- include/asm-sparc64/cpudata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 26b1dc9afbf1..8a171ad77274 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -7,6 +7,7 @@ #define _SPARC64_CPUDATA_H #include +#include #ifndef __ASSEMBLY__ @@ -130,9 +131,9 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, lduwa [REG] ASI_PHYS_BYPASS_EC_E, REG;\ /* sun4v implementation. */ \ mov SCRATCHPAD_CPUID, REG; \ - nop; \ ldxa [REG] ASI_SCRATCHPAD, REG; \ nop; \ + nop; \ .previous; /* Clobbers TMP, current address space PGD phys address into DEST. */ -- cgit v1.2.3-59-g8ed1b From 7202c55c5c57d2ad4611a751544c9368d7fba93a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 7 Feb 2006 22:53:56 -0800 Subject: [SPARC64]: Add sun4v mondo queue bases to struct trap_per_cpu. Also, correct TRAP_PER_CPU_FAULT_INFO define, it should be 0x40 not 0x20. Signed-off-by: David S. Miller --- include/asm-sparc64/cpudata.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 8a171ad77274..492314b53475 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -53,15 +53,18 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); */ struct thread_info; struct trap_per_cpu { -/* D-cache line 1 */ +/* D-cache line 1: Basic thread information */ struct thread_info *thread; unsigned long pgd_paddr; unsigned long __pad1[2]; -/* D-cache line 2 */ - unsigned long __pad2[4]; +/* D-cache line 2: Sun4V Mondo Queue pointers */ + unsigned long cpu_mondo_pa; + unsigned long dev_mondo_pa; + unsigned long resum_mondo_pa; + unsigned long nonresum_mondo_pa; -/* Dcache lines 3 and 4 */ +/* Dcache lines 3 and 4: Hypervisor Fault Status */ struct hv_fault_status fault_info; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; @@ -95,11 +98,15 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #endif /* !(__ASSEMBLY__) */ -#define TRAP_PER_CPU_THREAD 0x00 -#define TRAP_PER_CPU_PGD_PADDR 0x08 -#define TRAP_PER_CPU_FAULT_INFO 0x20 +#define TRAP_PER_CPU_THREAD 0x00 +#define TRAP_PER_CPU_PGD_PADDR 0x08 +#define TRAP_PER_CPU_CPU_MONDO_PA 0x20 +#define TRAP_PER_CPU_DEV_MONDO_PA 0x28 +#define TRAP_PER_CPU_RESUM_MONDO_PA 0x30 +#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x38 +#define TRAP_PER_CPU_FAULT_INFO 0x40 -#define TRAP_BLOCK_SZ_SHIFT 7 +#define TRAP_BLOCK_SZ_SHIFT 7 #include -- cgit v1.2.3-59-g8ed1b From 5b0c0572fcd6204675c5f7ddfa572b5017f817dd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 8 Feb 2006 02:53:50 -0800 Subject: [SPARC64]: Sun4v interrupt handling. Sun4v has 4 interrupt queues: cpu, device, resumable errors, and non-resumable errors. A set of head/tail offset pointers help maintain a work queue in physical memory. The entries are 64-bytes in size. Each queue is allocated then registered with the hypervisor as we bring cpus up. The two error queues each get a kernel side buffer that we use to quickly empty the main interrupt queue before we call up to C code to log the event and possibly take evasive action. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 3 +- arch/sparc64/kernel/irq.c | 16 +- arch/sparc64/kernel/sun4v_ivec.S | 349 +++++++++++++++++++++++++++++++++++++++ arch/sparc64/kernel/traps.c | 184 +++++++++++++++++++++ arch/sparc64/kernel/ttable.S | 5 +- include/asm-sparc64/cpudata.h | 22 ++- 6 files changed, 568 insertions(+), 11 deletions(-) create mode 100644 arch/sparc64/kernel/sun4v_ivec.S (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index a304845f8c56..01980014aead 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -511,13 +511,14 @@ setup_tba: sparc64_boot_end: #include "systbls.S" -#include "sun4v_tlb_miss.S" #include "ktlb.S" #include "tsb.S" #include "etrap.S" #include "rtrap.S" #include "winfixup.S" #include "entry.S" +#include "sun4v_tlb_miss.S" +#include "sun4v_ivec.S" /* * The following skip makes sure the trap table in ttable.S is aligned diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 3c1a2139f1b9..ff201c007e0c 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -888,7 +888,19 @@ static void __cpuinit init_one_mondo(unsigned long *pa_ptr, unsigned long type) } } -/* Allocate and init the mondo queues for this cpu. */ +static void __cpuinit init_one_kbuf(unsigned long *pa_ptr) +{ + unsigned long page = get_zeroed_page(GFP_ATOMIC); + + if (!page) { + prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); + prom_halt(); + } + + *pa_ptr = __pa(page); +} + +/* Allocate and init the mondo and error queues for this cpu. */ void __cpuinit sun4v_init_mondo_queues(void) { int cpu = hard_smp_processor_id(); @@ -897,7 +909,9 @@ void __cpuinit sun4v_init_mondo_queues(void) init_one_mondo(&tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO); init_one_mondo(&tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO); init_one_mondo(&tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR); + init_one_kbuf(&tb->resum_kernel_buf_pa); init_one_mondo(&tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR); + init_one_kbuf(&tb->nonresum_kernel_buf_pa); } /* Only invoked on boot processor. */ diff --git a/arch/sparc64/kernel/sun4v_ivec.S b/arch/sparc64/kernel/sun4v_ivec.S new file mode 100644 index 000000000000..d9d442017d3d --- /dev/null +++ b/arch/sparc64/kernel/sun4v_ivec.S @@ -0,0 +1,349 @@ +/* sun4v_ivec.S: Sun4v interrupt vector handling. + * + * Copyright (C) 2006 + */ + +#include +#include + + .text + .align 32 + +sun4v_cpu_mondo: + /* Head offset in %g2, tail offset in %g4. + * If they are the same, no work. + */ + mov INTRQ_CPU_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_CPU_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_cpu_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + __GET_CPUID(%g1) + sethi %hi(trap_block), %g3 + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 + or %g3, %lo(trap_block), %g3 + add %g3, %g7, %g3 + + /* Get CPU mondo queue base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 + + /* Now get the cross-call arguments and handler PC, same + * layout as sun4u: + * + * 1st 64-bit word: low half is 32-bit PC, put into %g3 and jmpl to it + * high half is context arg to MMU flushes, into %g5 + * 2nd 64-bit word: 64-bit arg, load into %g1 + * 3rd 64-bit word: 64-bit arg, load into %g7 + */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x8, %g2 + srlx %g3, 32, %g5 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + add %g2, 0x8, %g2 + srl %g3, 0, %g3 + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g7 + add %g2, 0x40 - 0x8 - 0x8, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_CPU_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + jmpl %g3, %g0 + nop + +sun4v_cpu_mondo_queue_empty: + retry + +sun4v_dev_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_DEVICE_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_DEVICE_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_dev_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + __GET_CPUID(%g1) + sethi %hi(trap_block), %g3 + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 + or %g3, %lo(trap_block), %g3 + add %g3, %g7, %g3 + + /* Get DEV mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_DEV_MONDO_PA], %g5 + + /* Load IVEC into %g3. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + add %g2, 0x40, %g2 + + /* XXX There can be a full 64-byte block of data here. + * XXX This is how we can get at MSI vector data. + * XXX Current we do not capture this, but when we do we'll + * XXX need to add a 64-byte storage area in the struct ino_bucket + * XXX or the struct irq_desc. + */ + + /* Update queue head pointer, this frees up some registers. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_DEVICE_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Get &__irq_work[smp_processor_id()] into %g1. */ + sethi %hi(__irq_work), %g4 + sllx %g1, 6, %g1 + or %g4, %lo(__irq_work), %g4 + add %g4, %g1, %g1 + + /* Get &ivector_table[IVEC] into %g4. */ + sethi %hi(ivector_table), %g4 + sllx %g3, 5, %g3 + or %g4, %lo(ivector_table), %g4 + add %g4, %g3, %g4 + + /* Load IRQ %pil into %g5. */ + ldub [%g4 + 0x04], %g5 + + /* Insert ivector_table[] entry into __irq_work[] queue. */ + sllx %g5, 2, %g3 + lduw [%g1 + %g3], %g2 /* g2 = irq_work(cpu, pil) */ + stw %g2, [%g4 + 0x00] /* bucket->irq_chain = g2 */ + stw %g4, [%g1 + %g3] /* irq_work(cpu, pil) = bucket */ + + /* Signal the interrupt by setting (1 << pil) in %softint. */ + mov 1, %g2 + sllx %g2, %g5, %g2 + wr %g2, 0x0, %set_softint + +sun4v_dev_mondo_queue_empty: + retry + +sun4v_res_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_RESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_res_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + __GET_CPUID(%g1) + sethi %hi(trap_block), %g3 + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 + or %g3, %lo(trap_block), %g3 + add %g3, %g7, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_RESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_res_mondo_queue_full + nop + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_RESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, 15, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_resum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_res_mondo_queue_empty: + retry + +sun4v_res_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_RESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, 15, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + call sun4v_resum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo: + /* Head offset in %g2, tail offset in %g4. */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + ldxa [%g2] ASI_QUEUE, %g2 + mov INTRQ_NONRESUM_MONDO_TAIL, %g4 + ldxa [%g4] ASI_QUEUE, %g4 + cmp %g2, %g4 + be,pn %xcc, sun4v_nonres_mondo_queue_empty + nop + + /* Get &trap_block[smp_processor_id()] into %g3. */ + __GET_CPUID(%g1) + sethi %hi(trap_block), %g3 + sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 + or %g3, %lo(trap_block), %g3 + add %g3, %g7, %g3 + + /* Get RES mondo queue base phys address into %g5. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5 + + /* Get RES kernel buffer base phys address into %g7. */ + ldx [%g3 + TRAP_PER_CPU_NONRESUM_KBUF_PA], %g7 + + /* If the first word is non-zero, queue is full. */ + ldxa [%g7 + %g2] ASI_PHYS_USE_EC, %g1 + brnz,pn %g1, sun4v_nonres_mondo_queue_full + nop + + /* Remember this entry's offset in %g1. */ + mov %g2, %g1 + + /* Copy 64-byte queue entry into kernel buffer. */ + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + ldxa [%g5 + %g2] ASI_PHYS_USE_EC, %g3 + stxa %g3, [%g7 + %g2] ASI_PHYS_USE_EC + add %g2, 0x08, %g2 + + /* Update queue head pointer. */ + sethi %hi(8192 - 1), %g4 + or %g4, %lo(8192 - 1), %g4 + and %g2, %g4, %g2 + + mov INTRQ_NONRESUM_MONDO_HEAD, %g4 + stxa %g2, [%g4] ASI_QUEUE + membar #Sync + + /* Disable interrupts and save register state so we can call + * C code. The etrap handling will leave %g4 in %l4 for us + * when it's done. + */ + rdpr %pil, %g2 + wrpr %g0, 15, %pil + mov %g1, %g4 + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + /* Log the event. */ + add %sp, PTREGS_OFF, %o0 + call sun4v_nonresum_error + mov %l4, %o1 + + /* Return from trap. */ + ba,pt %xcc, rtrap_irq + nop + +sun4v_nonres_mondo_queue_empty: + retry + +sun4v_nonres_mondo_queue_full: + /* The queue is full, consolidate our damage by setting + * the head equal to the tail. We'll just trap again otherwise. + * Call C code to log the event. + */ + mov INTRQ_NONRESUM_MONDO_HEAD, %g2 + stxa %g4, [%g2] ASI_QUEUE + membar #Sync + + rdpr %pil, %g2 + wrpr %g0, 15, %pil + ba,pt %xcc, etrap_irq + rd %pc, %g7 + + call sun4v_nonresum_overflow + add %sp, PTREGS_OFF, %o0 + + ba,pt %xcc, rtrap_irq + nop diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 8f3fce24359d..5417ff1b9345 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -1668,6 +1668,186 @@ void cheetah_plus_parity_error(int type, struct pt_regs *regs) regs->tpc); } +struct sun4v_error_entry { + u64 err_handle; + u64 err_stick; + + u32 err_type; +#define SUN4V_ERR_TYPE_UNDEFINED 0 +#define SUN4V_ERR_TYPE_UNCORRECTED_RES 1 +#define SUN4V_ERR_TYPE_PRECISE_NONRES 2 +#define SUN4V_ERR_TYPE_DEFERRED_NONRES 3 +#define SUN4V_ERR_TYPE_WARNING_RES 4 + + u32 err_attrs; +#define SUN4V_ERR_ATTRS_PROCESSOR 0x00000001 +#define SUN4V_ERR_ATTRS_MEMORY 0x00000002 +#define SUN4V_ERR_ATTRS_PIO 0x00000004 +#define SUN4V_ERR_ATTRS_INT_REGISTERS 0x00000008 +#define SUN4V_ERR_ATTRS_FPU_REGISTERS 0x00000010 +#define SUN4V_ERR_ATTRS_USER_MODE 0x01000000 +#define SUN4V_ERR_ATTRS_PRIV_MODE 0x02000000 +#define SUN4V_ERR_ATTRS_RES_QUEUE_FULL 0x80000000 + + u64 err_raddr; + u32 err_size; + u16 err_cpu; + u16 err_pad; +}; + +static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0); +static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0); + +static const char *sun4v_err_type_to_str(u32 type) +{ + switch (type) { + case SUN4V_ERR_TYPE_UNDEFINED: + return "undefined"; + case SUN4V_ERR_TYPE_UNCORRECTED_RES: + return "uncorrected resumable"; + case SUN4V_ERR_TYPE_PRECISE_NONRES: + return "precise nonresumable"; + case SUN4V_ERR_TYPE_DEFERRED_NONRES: + return "deferred nonresumable"; + case SUN4V_ERR_TYPE_WARNING_RES: + return "warning resumable"; + default: + return "unknown"; + }; +} + +static void sun4v_log_error(struct sun4v_error_entry *ent, int cpu, const char *pfx, atomic_t *ocnt) +{ + int cnt; + + printk("%s: Reporting on cpu %d\n", pfx, cpu); + printk("%s: err_handle[%lx] err_stick[%lx] err_type[%08x:%s]\n", + pfx, + ent->err_handle, ent->err_stick, + ent->err_type, + sun4v_err_type_to_str(ent->err_type)); + printk("%s: err_attrs[%08x:%s %s %s %s %s %s %s %s]\n", + pfx, + ent->err_attrs, + ((ent->err_attrs & SUN4V_ERR_ATTRS_PROCESSOR) ? + "processor" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_MEMORY) ? + "memory" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PIO) ? + "pio" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_INT_REGISTERS) ? + "integer-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_FPU_REGISTERS) ? + "fpu-regs" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_USER_MODE) ? + "user" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_PRIV_MODE) ? + "privileged" : ""), + ((ent->err_attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL) ? + "queue-full" : "")); + printk("%s: err_raddr[%016lx] err_size[%u] err_cpu[%u]\n", + pfx, + ent->err_raddr, ent->err_size, ent->err_cpu); + + if ((cnt = atomic_read(ocnt)) != 0) { + atomic_set(ocnt, 0); + wmb(); + printk("%s: Queue overflowed %d times.\n", + pfx, cnt); + } +} + +/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate. + * Log the event and clear the first word of the entry. + */ +void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->resum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + + sun4v_log_error(&local_copy, cpu, + KERN_ERR "RESUMABLE ERROR", + &sun4v_resum_oflow_cnt); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_resum_overflow(struct pt_regs *regs) +{ + atomic_inc(&sun4v_resum_oflow_cnt); +} + +/* We run with %pil set to 15 and PSTATE_IE enabled in %pstate. + * Log the event, clear the first word of the entry, and die. + */ +void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset) +{ + struct sun4v_error_entry *ent, local_copy; + struct trap_per_cpu *tb; + unsigned long paddr; + int cpu; + + cpu = get_cpu(); + + tb = &trap_block[cpu]; + paddr = tb->nonresum_kernel_buf_pa + offset; + ent = __va(paddr); + + memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry)); + + /* We have a local copy now, so release the entry. */ + ent->err_handle = 0; + wmb(); + + put_cpu(); + +#ifdef CONFIG_PCI + /* Check for the special PCI poke sequence. */ + if (pci_poke_in_progress && pci_poke_cpu == cpu) { + pci_poke_faulted = 1; + regs->tpc += 4; + regs->tnpc = regs->tpc + 4; + return; + } +#endif + + sun4v_log_error(&local_copy, cpu, + KERN_EMERG "NON-RESUMABLE ERROR", + &sun4v_nonresum_oflow_cnt); + + panic("Non-resumable error."); +} + +/* If we try to printk() we'll probably make matters worse, by trying + * to retake locks this cpu already holds or causing more errors. So + * just bump a counter, and we'll report these counter bumps above. + */ +void sun4v_nonresum_overflow(struct pt_regs *regs) +{ + /* XXX Actually even this can make not that much sense. Perhaps + * XXX we should just pull the plug and panic directly from here? + */ + atomic_inc(&sun4v_nonresum_oflow_cnt); +} + void do_fpe_common(struct pt_regs *regs) { if (regs->tstate & TSTATE_PRIV) { @@ -2190,8 +2370,12 @@ void __init trap_init(void) offsetof(struct trap_per_cpu, dev_mondo_pa)) || (TRAP_PER_CPU_RESUM_MONDO_PA != offsetof(struct trap_per_cpu, resum_mondo_pa)) || + (TRAP_PER_CPU_RESUM_KBUF_PA != + offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || (TRAP_PER_CPU_NONRESUM_MONDO_PA != offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || + (TRAP_PER_CPU_NONRESUM_KBUF_PA != + offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || (TRAP_PER_CPU_FAULT_INFO != offsetof(struct trap_per_cpu, fault_info))) trap_per_cpu_offsets_are_bolixed_dave(); diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 2679b6e253ae..1608ba4bf1c1 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -88,7 +88,10 @@ tl0_dcpe: BTRAP(0x71) /* D-cache Parity Error on Cheetah+ */ tl0_icpe: BTRAP(0x72) /* I-cache Parity Error on Cheetah+ */ tl0_resv073: BTRAP(0x73) BTRAP(0x74) BTRAP(0x75) tl0_resv076: BTRAP(0x76) BTRAP(0x77) BTRAP(0x78) BTRAP(0x79) BTRAP(0x7a) BTRAP(0x7b) -tl0_resv07c: BTRAP(0x7c) BTRAP(0x7d) BTRAP(0x7e) BTRAP(0x7f) +tl0_cpu_mondo: TRAP_NOSAVE(sun4v_cpu_mondo) +tl0_dev_mondo: TRAP_NOSAVE(sun4v_dev_mondo) +tl0_res_mondo: TRAP_NOSAVE(sun4v_res_mondo) +tl0_nres_mondo: TRAP_NOSAVE(sun4v_nonres_mondo) tl0_s0n: SPILL_0_NORMAL tl0_s1n: SPILL_1_NORMAL tl0_s2n: SPILL_2_NORMAL diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 492314b53475..7f0a74ec47f6 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -53,16 +53,17 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); */ struct thread_info; struct trap_per_cpu { -/* D-cache line 1: Basic thread information */ +/* D-cache line 1: Basic thread information, cpu and device mondo queues */ struct thread_info *thread; unsigned long pgd_paddr; - unsigned long __pad1[2]; - -/* D-cache line 2: Sun4V Mondo Queue pointers */ unsigned long cpu_mondo_pa; unsigned long dev_mondo_pa; + +/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */ unsigned long resum_mondo_pa; + unsigned long resum_kernel_buf_pa; unsigned long nonresum_mondo_pa; + unsigned long nonresum_kernel_buf_pa; /* Dcache lines 3 and 4: Hypervisor Fault Status */ struct hv_fault_status fault_info; @@ -100,10 +101,12 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #define TRAP_PER_CPU_THREAD 0x00 #define TRAP_PER_CPU_PGD_PADDR 0x08 -#define TRAP_PER_CPU_CPU_MONDO_PA 0x20 -#define TRAP_PER_CPU_DEV_MONDO_PA 0x28 -#define TRAP_PER_CPU_RESUM_MONDO_PA 0x30 -#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x38 +#define TRAP_PER_CPU_CPU_MONDO_PA 0x10 +#define TRAP_PER_CPU_DEV_MONDO_PA 0x18 +#define TRAP_PER_CPU_RESUM_MONDO_PA 0x20 +#define TRAP_PER_CPU_RESUM_KBUF_PA 0x28 +#define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30 +#define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38 #define TRAP_PER_CPU_FAULT_INFO 0x40 #define TRAP_BLOCK_SZ_SHIFT 7 @@ -188,6 +191,9 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #else +#define __GET_CPUID(REG) \ + mov 0, REG; + /* Uniprocessor versions, we know the cpuid is zero. */ #define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ sethi %hi(trap_block), DEST; \ -- cgit v1.2.3-59-g8ed1b From 1d2f1f90a1e004b0c1b8a73ed4394a93f09104b3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 8 Feb 2006 16:41:20 -0800 Subject: [SPARC64]: Sun4v cross-call sending support. Technically the hypervisor call supports sending in a list of all cpus to get the cross-call, but I only pass in one cpu at a time for now. The multi-cpu support is there, just ifdef'd out so it's easy to enable or delete it later. Signed-off-by: David S. Miller --- arch/sparc64/kernel/irq.c | 22 ++++++++ arch/sparc64/kernel/smp.c | 125 +++++++++++++++++++++++++++++++++++++++++- arch/sparc64/kernel/traps.c | 6 +- include/asm-sparc64/cpudata.h | 14 ++++- 4 files changed, 163 insertions(+), 4 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index ff201c007e0c..c80d2531ec46 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -900,6 +900,24 @@ static void __cpuinit init_one_kbuf(unsigned long *pa_ptr) *pa_ptr = __pa(page); } +static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb) +{ +#ifdef CONFIG_SMP + unsigned long page; + + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + + page = get_zeroed_page(GFP_ATOMIC); + if (!page) { + prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_halt(); + } + + tb->cpu_mondo_block_pa = __pa(page); + tb->cpu_list_pa = __pa(page + 64); +#endif +} + /* Allocate and init the mondo and error queues for this cpu. */ void __cpuinit sun4v_init_mondo_queues(void) { @@ -908,10 +926,14 @@ void __cpuinit sun4v_init_mondo_queues(void) init_one_mondo(&tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO); init_one_mondo(&tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO); + init_one_mondo(&tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR); init_one_kbuf(&tb->resum_kernel_buf_pa); + init_one_mondo(&tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR); init_one_kbuf(&tb->nonresum_kernel_buf_pa); + + init_cpu_send_mondo_info(tb); } /* Only invoked on boot processor. */ diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 223cc6bd369a..c10a3a8639e8 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -531,10 +531,133 @@ retry: } } +#if 0 +/* Multi-cpu list version. */ +static int init_cpu_list(u16 *list, cpumask_t mask) +{ + int i, cnt; + + cnt = 0; + for_each_cpu_mask(i, mask) + list[cnt++] = i; + + return cnt; +} + +static int update_cpu_list(u16 *list, int orig_cnt, cpumask_t mask) +{ + int i; + + for (i = 0; i < orig_cnt; i++) { + if (list[i] == 0xffff) + cpu_clear(i, mask); + } + + return init_cpu_list(list, mask); +} + +static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) +{ + int this_cpu = get_cpu(); + struct trap_per_cpu *tb = &trap_block[this_cpu]; + u64 *mondo = __va(tb->cpu_mondo_block_pa); + u16 *cpu_list = __va(tb->cpu_list_pa); + int cnt, retries; + + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + retries = 0; + cnt = init_cpu_list(cpu_list, mask); + do { + register unsigned long func __asm__("%o0"); + register unsigned long arg0 __asm__("%o1"); + register unsigned long arg1 __asm__("%o2"); + register unsigned long arg2 __asm__("%o3"); + + func = HV_FAST_CPU_MONDO_SEND; + arg0 = cnt; + arg1 = tb->cpu_list_pa; + arg2 = tb->cpu_mondo_block_pa; + + __asm__ __volatile__("ta %8" + : "=&r" (func), "=&r" (arg0), + "=&r" (arg1), "=&r" (arg2) + : "0" (func), "1" (arg0), + "2" (arg1), "3" (arg2), + "i" (HV_FAST_TRAP) + : "memory"); + if (likely(func == HV_EOK)) + break; + + if (unlikely(++retries > 100)) { + printk("CPU[%d]: sun4v mondo error %lu\n", + this_cpu, func); + break; + } + + cnt = update_cpu_list(cpu_list, cnt, mask); + + udelay(2 * cnt); + } while (1); + + put_cpu(); +} +#else +/* Single-cpu list version. */ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) { - /* XXX implement me */ + int this_cpu = get_cpu(); + struct trap_per_cpu *tb = &trap_block[this_cpu]; + u64 *mondo = __va(tb->cpu_mondo_block_pa); + u16 *cpu_list = __va(tb->cpu_list_pa); + int i; + + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + for_each_cpu_mask(i, mask) { + int retries = 0; + + do { + register unsigned long func __asm__("%o0"); + register unsigned long arg0 __asm__("%o1"); + register unsigned long arg1 __asm__("%o2"); + register unsigned long arg2 __asm__("%o3"); + + cpu_list[0] = i; + func = HV_FAST_CPU_MONDO_SEND; + arg0 = 1; + arg1 = tb->cpu_list_pa; + arg2 = tb->cpu_mondo_block_pa; + + __asm__ __volatile__("ta %8" + : "=&r" (func), "=&r" (arg0), + "=&r" (arg1), "=&r" (arg2) + : "0" (func), "1" (arg0), + "2" (arg1), "3" (arg2), + "i" (HV_FAST_TRAP) + : "memory"); + if (likely(func == HV_EOK)) + break; + + if (unlikely(++retries > 100)) { + printk("CPU[%d]: sun4v mondo error %lu\n", + this_cpu, func); + break; + } + + udelay(2 * i); + } while (1); + } + + put_cpu(); } +#endif /* Send cross call to all processors mentioned in MASK * except self. diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 5417ff1b9345..ac171161e794 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -2377,7 +2377,11 @@ void __init trap_init(void) (TRAP_PER_CPU_NONRESUM_KBUF_PA != offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || (TRAP_PER_CPU_FAULT_INFO != - offsetof(struct trap_per_cpu, fault_info))) + offsetof(struct trap_per_cpu, fault_info)) || + (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != + offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || + (TRAP_PER_CPU_CPU_LIST_PA != + offsetof(struct trap_per_cpu, cpu_list_pa))) trap_per_cpu_offsets_are_bolixed_dave(); /* Attach to the address space of init_task. On SMP we diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 7f0a74ec47f6..338b0ca5b519 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -65,8 +65,16 @@ struct trap_per_cpu { unsigned long nonresum_mondo_pa; unsigned long nonresum_kernel_buf_pa; -/* Dcache lines 3 and 4: Hypervisor Fault Status */ +/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */ struct hv_fault_status fault_info; + +/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list. */ + unsigned long cpu_mondo_block_pa; + unsigned long cpu_list_pa; + unsigned long __pad1[2]; + +/* Dcache line 8: Unused, needed to keep trap_block a power-of-2 in size. */ + unsigned long __pad2[4]; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(void); @@ -108,8 +116,10 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #define TRAP_PER_CPU_NONRESUM_MONDO_PA 0x30 #define TRAP_PER_CPU_NONRESUM_KBUF_PA 0x38 #define TRAP_PER_CPU_FAULT_INFO 0x40 +#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA 0xc0 +#define TRAP_PER_CPU_CPU_LIST_PA 0xc8 -#define TRAP_BLOCK_SZ_SHIFT 7 +#define TRAP_BLOCK_SZ_SHIFT 8 #include -- cgit v1.2.3-59-g8ed1b From 12eaa328f9fb2d3fcb5afb682c762690d05a3cd8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 10 Feb 2006 15:39:51 -0800 Subject: [SPARC64]: Use ASI_SCRATCHPAD address 0x0 properly. This is where the virtual address of the fault status area belongs. To set it up we don't make a hypervisor call, instead we call OBP's SUNW,set-trap-table with the real address of the fault status area as the second argument. And right before that call we write the virtual address into ASI_SCRATCHPAD vaddr 0x0. Signed-off-by: David S. Miller --- arch/sparc64/kernel/head.S | 29 ++++++- arch/sparc64/kernel/sun4v_ivec.S | 28 ++---- arch/sparc64/kernel/sun4v_tlb_miss.S | 162 ++++++++++------------------------- arch/sparc64/kernel/trampoline.S | 29 ++++++- arch/sparc64/mm/init.c | 22 +---- arch/sparc64/prom/misc.c | 5 ++ include/asm-sparc64/cpudata.h | 26 +++--- include/asm-sparc64/oplib.h | 1 + include/asm-sparc64/ttable.h | 34 ++++---- 9 files changed, 144 insertions(+), 192 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index d048f0dfd423..f581f0e917f7 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -521,11 +521,36 @@ setup_trap_table: wrpr %g0, 15, %pil /* Make the firmware call to jump over to the Linux trap table. */ - call prom_set_trap_table + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 1f + nop + + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + /* Compute physical address: + * + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) + */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 + + call prom_set_trap_table_sun4v + sethi %hi(sparc64_ttable_tl0), %o0 + + ba,pt %xcc, 2f + nop + +1: call prom_set_trap_table sethi %hi(sparc64_ttable_tl0), %o0 /* Start using proper page size encodings in ctx register. */ - sethi %hi(sparc64_kern_pri_context), %g3 +2: sethi %hi(sparc64_kern_pri_context), %g3 ldx [%g3 + %lo(sparc64_kern_pri_context)], %g2 mov PRIMARY_CONTEXT, %g1 diff --git a/arch/sparc64/kernel/sun4v_ivec.S b/arch/sparc64/kernel/sun4v_ivec.S index d9d442017d3d..c0367ef7e098 100644 --- a/arch/sparc64/kernel/sun4v_ivec.S +++ b/arch/sparc64/kernel/sun4v_ivec.S @@ -22,11 +22,8 @@ sun4v_cpu_mondo: nop /* Get &trap_block[smp_processor_id()] into %g3. */ - __GET_CPUID(%g1) - sethi %hi(trap_block), %g3 - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 - or %g3, %lo(trap_block), %g3 - add %g3, %g7, %g3 + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 /* Get CPU mondo queue base phys address into %g7. */ ldx [%g3 + TRAP_PER_CPU_CPU_MONDO_PA], %g7 @@ -74,11 +71,8 @@ sun4v_dev_mondo: nop /* Get &trap_block[smp_processor_id()] into %g3. */ - __GET_CPUID(%g1) - sethi %hi(trap_block), %g3 - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 - or %g3, %lo(trap_block), %g3 - add %g3, %g7, %g3 + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 /* Get DEV mondo queue base phys address into %g5. */ ldx [%g3 + TRAP_PER_CPU_DEV_MONDO_PA], %g5 @@ -143,11 +137,8 @@ sun4v_res_mondo: nop /* Get &trap_block[smp_processor_id()] into %g3. */ - __GET_CPUID(%g1) - sethi %hi(trap_block), %g3 - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 - or %g3, %lo(trap_block), %g3 - add %g3, %g7, %g3 + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 /* Get RES mondo queue base phys address into %g5. */ ldx [%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5 @@ -251,11 +242,8 @@ sun4v_nonres_mondo: nop /* Get &trap_block[smp_processor_id()] into %g3. */ - __GET_CPUID(%g1) - sethi %hi(trap_block), %g3 - sllx %g1, TRAP_BLOCK_SZ_SHIFT, %g7 - or %g3, %lo(trap_block), %g3 - add %g3, %g7, %g3 + ldxa [%g0] ASI_SCRATCHPAD, %g3 + sub %g3, TRAP_PER_CPU_FAULT_INFO, %g3 /* Get RES mondo queue base phys address into %g5. */ ldx [%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5 diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S index c408b05a5f0a..f6222623de38 100644 --- a/arch/sparc64/kernel/sun4v_tlb_miss.S +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -7,26 +7,20 @@ .align 32 sun4v_itlb_miss: - /* Load CPU ID into %g3. */ - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g3 /* Load UTSB reg into %g1. */ - ldxa [%g1 + %g1] ASI_SCRATCHPAD, %g1 - - /* Load &trap_block[smp_processor_id()] into %g2. */ - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 + mov SCRATCHPAD_UTSBREG1, %g1 + ldxa [%g1] ASI_SCRATCHPAD, %g1 /* Create a TAG TARGET, "(vaddr>>22) | (ctx << 48)", in %g6. * Branch if kernel TLB miss. The kernel TSB and user TSB miss * code wants the missing virtual address in %g4, so that value * cannot be modified through the entirety of this handler. */ - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 srlx %g4, 22, %g3 sllx %g5, 48, %g6 or %g6, %g3, %g6 @@ -90,26 +84,20 @@ sun4v_itlb_load: retry sun4v_dtlb_miss: - /* Load CPU ID into %g3. */ - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 /* Load UTSB reg into %g1. */ + mov SCRATCHPAD_UTSBREG1, %g1 ldxa [%g1 + %g1] ASI_SCRATCHPAD, %g1 - /* Load &trap_block[smp_processor_id()] into %g2. */ - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - /* Create a TAG TARGET, "(vaddr>>22) | (ctx << 48)", in %g6. * Branch if kernel TLB miss. The kernel TSB and user TSB miss * code wants the missing virtual address in %g4, so that value * cannot be modified through the entirety of this handler. */ - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 srlx %g4, 22, %g3 sllx %g5, 48, %g6 or %g6, %g3, %g6 @@ -169,17 +157,10 @@ sun4v_dtlb_load: retry sun4v_dtlb_prot: - /* Load CPU ID into %g3. */ - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 + /* Load MMU Miss base into %g2. */ + ldxa [%g0] ASI_SCRATCHPAD, %g2 - /* Load &trap_block[smp_processor_id()] into %g2. */ - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g5 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g5 rdpr %tl, %g1 cmp %g1, 1 bgu,pn %xcc, winfix_trampoline @@ -187,35 +168,17 @@ sun4v_dtlb_prot: ba,pt %xcc, sparc64_realfault_common mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4 - /* Called from trap table with &trap_block[smp_processor_id()] in - * %g5 and SCRATCHPAD_UTSBREG1 contents in %g1. + /* Called from trap table with TAG TARGET placed into + * %g6 and SCRATCHPAD_UTSBREG1 contents in %g1. */ sun4v_itsb_miss: - ldx [%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4 - ldx [%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5 - - srlx %g4, 22, %g7 - sllx %g5, 48, %g6 - or %g6, %g7, %g6 - brz,pn %g5, kvmap_itlb_4v - nop - ba,pt %xcc, sun4v_tsb_miss_common mov FAULT_CODE_ITLB, %g3 - /* Called from trap table with &trap_block[smp_processor_id()] in - * %g5 and SCRATCHPAD_UTSBREG1 contents in %g1. + /* Called from trap table with TAG TARGET placed into + * %g6 and SCRATCHPAD_UTSBREG1 contents in %g1. */ sun4v_dtsb_miss: - ldx [%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 - ldx [%g5 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 - - srlx %g4, 22, %g7 - sllx %g5, 48, %g6 - or %g6, %g7, %g6 - brz,pn %g5, kvmap_dtlb_4v - nop - mov FAULT_CODE_DTLB, %g3 /* Create TSB pointer into %g1. This is something like: @@ -239,15 +202,10 @@ sun4v_tsb_miss_common: /* Instruction Access Exception, tl0. */ sun4v_iacc: - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_TYPE_OFFSET], %g3 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5 + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 sllx %g3, 16, %g3 or %g5, %g3, %g5 ba,pt %xcc, etrap @@ -260,15 +218,10 @@ sun4v_iacc: /* Instruction Access Exception, tl1. */ sun4v_iacc_tl1: - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_TYPE_OFFSET], %g3 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_I_CTX_OFFSET], %g5 + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_I_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5 sllx %g3, 16, %g3 or %g5, %g3, %g5 ba,pt %xcc, etraptl1 @@ -281,15 +234,10 @@ sun4v_iacc_tl1: /* Data Access Exception, tl0. */ sun4v_dacc: - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 sllx %g3, 16, %g3 or %g5, %g3, %g5 ba,pt %xcc, etrap @@ -302,15 +250,10 @@ sun4v_dacc: /* Data Access Exception, tl1. */ sun4v_dacc_tl1: - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 sllx %g3, 16, %g3 or %g5, %g3, %g5 ba,pt %xcc, etraptl1 @@ -323,15 +266,10 @@ sun4v_dacc_tl1: /* Memory Address Unaligned. */ sun4v_mna: - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 + ldxa [%g0] ASI_SCRATCHPAD, %g2 mov HV_FAULT_TYPE_UNALIGNED, %g3 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 sllx %g3, 16, %g3 or %g5, %g3, %g5 @@ -359,15 +297,10 @@ sun4v_privact: /* Unaligned ldd float, tl0. */ sun4v_lddfmna: - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 sllx %g3, 16, %g3 or %g5, %g3, %g5 ba,pt %xcc, etrap @@ -380,15 +313,10 @@ sun4v_lddfmna: /* Unaligned std float, tl0. */ sun4v_stdfmna: - mov SCRATCHPAD_CPUID, %g1 - ldxa [%g1] ASI_SCRATCHPAD, %g3 - sethi %hi(trap_block), %g2 - or %g2, %lo(trap_block), %g2 - sllx %g3, TRAP_BLOCK_SZ_SHIFT, %g3 - add %g2, %g3, %g2 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_TYPE_OFFSET], %g3 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_ADDR_OFFSET], %g4 - ldx [%g2 + TRAP_PER_CPU_FAULT_INFO + HV_FAULT_D_CTX_OFFSET], %g5 + ldxa [%g0] ASI_SCRATCHPAD, %g2 + ldx [%g2 + HV_FAULT_D_TYPE_OFFSET], %g3 + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4 + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5 sllx %g3, 16, %g3 or %g5, %g3, %g5 ba,pt %xcc, etrap diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index c476f5b321fb..88382200c7b8 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -389,10 +389,35 @@ after_lock_tlb: or %o1, PSTATE_IE, %o1 wrpr %o1, 0, %pstate - call prom_set_trap_table + sethi %hi(is_sun4v), %o0 + lduw [%o0 + %lo(is_sun4v)], %o0 + brz,pt %o0, 1f + nop + + TRAP_LOAD_TRAP_BLOCK(%g2, %g3) + add %g2, TRAP_PER_CPU_FAULT_INFO, %g2 + stxa %g2, [%g0] ASI_SCRATCHPAD + + /* Compute physical address: + * + * paddr = kern_base + (mmfsa_vaddr - KERNBASE) + */ + sethi %hi(KERNBASE), %g3 + sub %g2, %g3, %g2 + sethi %hi(kern_base), %g3 + ldx [%g3 + %lo(kern_base)], %g3 + add %g2, %g3, %o1 + + call prom_set_trap_table_sun4v + sethi %hi(sparc64_ttable_tl0), %o0 + + ba,pt %xcc, 2f + nop + +1: call prom_set_trap_table sethi %hi(sparc64_ttable_tl0), %o0 - call smp_callin +2: call smp_callin nop call cpu_idle mov 0, %o0 diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 7faba33202a9..88eb6f6be562 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1109,24 +1109,6 @@ static void __init tsb_phys_patch(void) } } -/* Register this cpu's fault status area with the hypervisor. */ -void __cpuinit sun4v_register_fault_status(void) -{ - register unsigned long func asm("%o5"); - register unsigned long arg0 asm("%o0"); - int cpu = hard_smp_processor_id(); - struct trap_per_cpu *tb = &trap_block[cpu]; - unsigned long pa; - - pa = kern_base + ((unsigned long) tb - KERNBASE); - func = HV_FAST_MMU_FAULT_AREA_CONF; - arg0 = pa; - __asm__ __volatile__("ta %4" - : "=&r" (func), "=&r" (arg0) - : "0" (func), "1" (arg0), - "i" (HV_FAST_TRAP)); -} - /* paging_init() sets up the page tables */ extern void cheetah_ecache_flush_init(void); @@ -1147,10 +1129,8 @@ void __init paging_init(void) tlb_type == hypervisor) tsb_phys_patch(); - if (tlb_type == hypervisor) { + if (tlb_type == hypervisor) sun4v_patch_tlb_handlers(); - sun4v_register_fault_status(); - } /* Find available physical memory... */ read_obp_memory("available", &pavail[0], &pavail_ents); diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c index 87f5cfce23bb..713cbac5f9bf 100644 --- a/arch/sparc64/prom/misc.c +++ b/arch/sparc64/prom/misc.c @@ -136,6 +136,11 @@ void prom_set_trap_table(unsigned long tba) p1275_cmd("SUNW,set-trap-table", P1275_INOUT(1, 0), tba); } +void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa) +{ + p1275_cmd("SUNW,set-trap-table", P1275_INOUT(2, 0), tba, mmfsa); +} + int prom_get_mmu_ihandle(void) { int node, ret; diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 338b0ca5b519..5a970f5ed9bd 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -156,13 +156,16 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, nop; \ .previous; -/* Clobbers TMP, current address space PGD phys address into DEST. */ -#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ +#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ __GET_CPUID(TMP) \ sethi %hi(trap_block), DEST; \ sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ or DEST, %lo(trap_block), DEST; \ add DEST, TMP, DEST; \ + +/* Clobbers TMP, current address space PGD phys address into DEST. */ +#define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; /* Clobbers TMP, loads local processor's IRQ work area into DEST. */ @@ -175,11 +178,8 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, /* Clobbers TMP, loads DEST with current thread info pointer. */ #define TRAP_LOAD_THREAD_REG(DEST, TMP) \ - __GET_CPUID(TMP) \ - sethi %hi(trap_block), DEST; \ - sllx TMP, TRAP_BLOCK_SZ_SHIFT, TMP; \ - or DEST, %lo(trap_block), DEST; \ - ldx [DEST + TMP], DEST; + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_THREAD], DEST; /* Given the current thread info pointer in THR, load the per-cpu * area base of the current processor into DEST. REG1, REG2, and REG3 are @@ -201,13 +201,13 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #else -#define __GET_CPUID(REG) \ - mov 0, REG; +#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + sethi %hi(trap_block), DEST; \ + or DEST, %lo(trap_block), DEST; \ /* Uniprocessor versions, we know the cpuid is zero. */ #define TRAP_LOAD_PGD_PHYS(DEST, TMP) \ - sethi %hi(trap_block), DEST; \ - or DEST, %lo(trap_block), DEST; \ + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ ldx [DEST + TRAP_PER_CPU_PGD_PADDR], DEST; #define TRAP_LOAD_IRQ_WORK(DEST, TMP) \ @@ -215,8 +215,8 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, or DEST, %lo(__irq_work), DEST; #define TRAP_LOAD_THREAD_REG(DEST, TMP) \ - sethi %hi(trap_block), DEST; \ - ldx [DEST + %lo(trap_block)], DEST; + TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ + ldx [DEST + TRAP_PER_CPU_THREAD], DEST; /* No per-cpu areas on uniprocessor, so no need to load DEST. */ #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h index 2ea545b931b0..ce5066ef2dd0 100644 --- a/include/asm-sparc64/oplib.h +++ b/include/asm-sparc64/oplib.h @@ -338,6 +338,7 @@ int cpu_find_by_mid(int mid, int *prom_node); /* Client interface level routines. */ extern void prom_set_trap_table(unsigned long tba); +extern void prom_set_trap_table_sun4v(unsigned long tba, unsigned long mmfsa); extern long p1275_cmd(const char *, long, ...); diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h index 972f913709a3..6bb86a7a5b42 100644 --- a/include/asm-sparc64/ttable.h +++ b/include/asm-sparc64/ttable.h @@ -180,25 +180,25 @@ #define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl) #endif -#define SUN4V_ITSB_MISS \ - mov SCRATCHPAD_CPUID, %g1; \ - ldxa [%g1] ASI_SCRATCHPAD, %g2; \ - ldxa [%g1 + %g1] ASI_SCRATCHPAD, %g1;\ - sethi %hi(trap_block), %g5; \ - sllx %g2, TRAP_BLOCK_SZ_SHIFT, %g2; \ - or %g5, %lo(trap_block), %g5; \ - ba,pt %xcc, sun4v_itsb_miss; \ - add %g5, %g2, %g5; +#define SUN4V_ITSB_MISS \ + ldxa [%g0] ASI_SCRATCHPAD, %g2; \ + ldx [%g2 + HV_FAULT_I_ADDR_OFFSET], %g4; \ + ldx [%g2 + HV_FAULT_I_CTX_OFFSET], %g5; \ + srlx %g4, 22, %g7; \ + sllx %g5, 48, %g6; \ + brz,pn %g5, kvmap_itlb_4v; \ + or %g6, %g7, %g6; \ + ba,a,pt %xcc, sun4v_itsb_miss; #define SUN4V_DTSB_MISS \ - mov SCRATCHPAD_CPUID, %g1; \ - ldxa [%g1] ASI_SCRATCHPAD, %g2; \ - ldxa [%g1 + %g1] ASI_SCRATCHPAD, %g1;\ - sethi %hi(trap_block), %g5; \ - sllx %g2, TRAP_BLOCK_SZ_SHIFT, %g2; \ - or %g5, %lo(trap_block), %g5; \ - ba,pt %xcc, sun4v_dtsb_miss; \ - add %g5, %g2, %g5; + ldxa [%g0] ASI_SCRATCHPAD, %g2; \ + ldx [%g2 + HV_FAULT_D_ADDR_OFFSET], %g4; \ + ldx [%g2 + HV_FAULT_D_CTX_OFFSET], %g5; \ + srlx %g4, 22, %g7; \ + sllx %g5, 48, %g6; \ + brz,pn %g5, kvmap_dtlb_4v; \ + or %g6, %g7, %g6; \ + ba,a,pt %xcc, sun4v_dtsb_miss; /* Before touching these macros, you owe it to yourself to go and * see how arch/sparc64/kernel/winfixup.S works... -DaveM -- cgit v1.2.3-59-g8ed1b From 72aff53f1fe74153eccef303ab2f79de888d248c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Feb 2006 01:29:17 -0800 Subject: [SPARC64]: Get SUN4V SMP working. The sibling cpu bringup is extremely fragile. We can only perform the most basic calls until we take over the trap table from the firmware/hypervisor on the new cpu. This means no accesses to %g4, %g5, %g6 since those can't be TLB translated without our trap handlers. In order to achieve this: 1) Change sun4v_init_mondo_queues() so that it can operate in several modes. It can allocate the queues, or install them in the current processor, or both. The boot cpu does both in it's call early on. Later, the boot cpu allocates the sibling cpu queue, starts the sibling cpu, then the sibling cpu loads them in. 2) init_cur_cpu_trap() is changed to take the current_thread_info() as an argument instead of reading %g6 directly on the current cpu. 3) Create a trampoline stack for the sibling cpus. We do our basic kernel calls using this stack, which is locked into the kernel image, then go to our proper thread stack after taking over the trap table. 4) While we are in this delicate startup state, we put 0xdeadbeef into %g4/%g5/%g6 in order to catch accidental accesses. 5) On the final prom_set_trap_table*() call, we put &init_thread_union into %g6. This is a hack to make prom_world(0) work. All that wants to do is restore the %asi register using get_thread_current_ds(). Longer term we should just do the OBP calls to set the trap table by hand just like we do for everything else. This would avoid that silly prom_world(0) issue, then we can remove the init_thread_union hack. Signed-off-by: David S. Miller --- arch/sparc64/kernel/irq.c | 30 ++++++++----- arch/sparc64/kernel/setup.c | 2 +- arch/sparc64/kernel/smp.c | 6 +++ arch/sparc64/kernel/trampoline.S | 92 ++++++++++++++++++++++++---------------- arch/sparc64/kernel/traps.c | 4 +- include/asm-sparc64/cpudata.h | 2 +- 6 files changed, 85 insertions(+), 51 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index 6eb44ca5dba6..bb0bb34555da 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -1018,21 +1018,29 @@ static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_ } /* Allocate and register the mondo and error queues for this cpu. */ -void __cpuinit sun4v_init_mondo_queues(int use_bootmem) +void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load) { - int cpu = hard_smp_processor_id(); struct trap_per_cpu *tb = &trap_block[cpu]; - alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem); - alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem); - alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem); - alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem); - alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem); - alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem); + if (alloc) { + alloc_one_mondo(&tb->cpu_mondo_pa, use_bootmem); + alloc_one_mondo(&tb->dev_mondo_pa, use_bootmem); + alloc_one_mondo(&tb->resum_mondo_pa, use_bootmem); + alloc_one_kbuf(&tb->resum_kernel_buf_pa, use_bootmem); + alloc_one_mondo(&tb->nonresum_mondo_pa, use_bootmem); + alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, use_bootmem); - init_cpu_send_mondo_info(tb, use_bootmem); + init_cpu_send_mondo_info(tb, use_bootmem); + } - sun4v_register_mondo_queues(cpu); + if (load) { + if (cpu != hard_smp_processor_id()) { + prom_printf("SUN4V: init mondo on cpu %d not %d\n", + cpu, hard_smp_processor_id()); + prom_halt(); + } + sun4v_register_mondo_queues(cpu); + } } /* Only invoked on boot processor. */ @@ -1043,7 +1051,7 @@ void __init init_IRQ(void) memset(&ivector_table[0], 0, sizeof(ivector_table)); if (tlb_type == hypervisor) - sun4v_init_mondo_queues(1); + sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1); /* We need to clear any IRQ's pending in the soft interrupt * registers, a spurious one could be left around from the diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 06807cf95ee1..9b0c409d5b6a 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -384,7 +384,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); /* Get boot processor trap_block[] setup. */ - init_cur_cpu_trap(); + init_cur_cpu_trap(current_thread_info()); } static int __init set_preferred_console(void) diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 527dfd7ae210..b586345fe3b9 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -316,6 +316,8 @@ static void smp_synchronize_one_tick(int cpu) spin_unlock_irqrestore(&itc_sync_lock, flags); } +extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); + extern unsigned long sparc64_cpu_startup; /* The OBP cpu startup callback truncates the 3rd arg cookie to @@ -339,6 +341,9 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) cpu_set(cpu, cpu_callout_map); if (tlb_type == hypervisor) { + /* Alloc the mondo queues, cpu will load them. */ + sun4v_init_mondo_queues(0, cpu, 1, 0); + prom_startcpu_cpuid(cpu, entry, cookie); } else { int cpu_node; @@ -352,6 +357,7 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) break; udelay(100); } + if (callin_flag) { ret = 0; } else { diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index b9c9f54b0a00..a4dc01a3d238 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -30,12 +30,16 @@ itlb_load: dtlb_load: .asciz "SUNW,dtlb-load" + /* XXX __cpuinit this thing XXX */ +#define TRAMP_STACK_SIZE 1024 + .align 16 +tramp_stack: + .skip TRAMP_STACK_SIZE + .text .align 8 .globl sparc64_cpu_startup, sparc64_cpu_startup_end sparc64_cpu_startup: - flushw - BRANCH_IF_SUN4V(g1, niagara_startup) BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup) BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup) @@ -58,6 +62,7 @@ cheetah_startup: or %g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5 stxa %g5, [%g0] ASI_DCU_CONTROL_REG membar #Sync + /* fallthru */ cheetah_generic_startup: mov TSB_EXTENSION_P, %g3 @@ -90,19 +95,17 @@ spitfire_startup: membar #Sync startup_continue: - wrpr %g0, 15, %pil - sethi %hi(0x80000000), %g2 sllx %g2, 32, %g2 wr %g2, 0, %tick_cmpr + mov %o0, %l0 + BRANCH_IF_SUN4V(g1, niagara_lock_tlb) /* Call OBP by hand to lock KERNBASE into i/d tlbs. * We lock 2 consequetive entries if we are 'bigkernel'. */ - mov %o0, %l0 - sethi %hi(prom_entry_lock), %g2 1: ldstub [%g2 + %lo(prom_entry_lock)], %g1 membar #StoreLoad | #StoreStore @@ -112,7 +115,6 @@ startup_continue: sethi %hi(p1275buf), %g2 or %g2, %lo(p1275buf), %g2 ldx [%g2 + 0x10], %l2 - mov %sp, %l1 add %l2, -(192 + 128), %sp flushw @@ -308,18 +310,9 @@ niagara_lock_tlb: ta HV_FAST_TRAP after_lock_tlb: - mov %l1, %sp - flushw - - mov %l0, %o0 - wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate wr %g0, 0, %fprs - /* XXX Buggy PROM... */ - srl %o0, 0, %o0 - ldx [%o0], %g6 - wr %g0, ASI_P, %asi mov PRIMARY_CONTEXT, %g7 @@ -341,22 +334,25 @@ after_lock_tlb: membar #Sync - mov 1, %g5 - sllx %g5, THREAD_SHIFT, %g5 - sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 - add %g6, %g5, %sp + /* Everything we do here, until we properly take over the + * trap table, must be done with extreme care. We cannot + * make any references to %g6 (current thread pointer), + * %g4 (current task pointer), or %g5 (base of current cpu's + * per-cpu area) until we properly take over the trap table + * from the firmware and hypervisor. + * + * Get onto temporary stack which is in the locked kernel image. + */ + sethi %hi(tramp_stack), %g1 + or %g1, %lo(tramp_stack), %g1 + add %g1, TRAMP_STACK_SIZE, %g1 + sub %g1, STACKFRAME_SZ + STACK_BIAS, %sp mov 0, %fp - wrpr %g0, 0, %wstate - wrpr %g0, 0, %tl - - /* Load TBA, then we can resurface. */ - sethi %hi(sparc64_ttable_tl0), %g5 - wrpr %g5, %tba - - ldx [%g6 + TI_TASK], %g4 - - wrpr %g0, 0, %wstate + /* Put garbage in these registers to trap any access to them. */ + set 0xdeadbeef, %g4 + set 0xdeadbeef, %g5 + set 0xdeadbeef, %g6 call init_irqwork_curcpu nop @@ -367,11 +363,17 @@ after_lock_tlb: bne,pt %icc, 1f nop + call hard_smp_processor_id + nop + + mov %o0, %o1 + mov 0, %o0 + mov 0, %o2 call sun4v_init_mondo_queues - mov 0, %o0 + mov 1, %o3 1: call init_cur_cpu_trap - nop + ldx [%l0], %o0 /* Start using proper page size encodings in ctx register. */ sethi %hi(sparc64_kern_pri_context), %g3 @@ -386,9 +388,14 @@ after_lock_tlb: membar #Sync - rdpr %pstate, %o1 - or %o1, PSTATE_IE, %o1 - wrpr %o1, 0, %pstate + wrpr %g0, 0, %wstate + + /* As a hack, put &init_thread_union into %g6. + * prom_world() loads from here to restore the %asi + * register. + */ + sethi %hi(init_thread_union), %g6 + or %g6, %lo(init_thread_union), %g6 sethi %hi(is_sun4v), %o0 lduw [%o0 + %lo(is_sun4v)], %o0 @@ -418,7 +425,20 @@ after_lock_tlb: 1: call prom_set_trap_table sethi %hi(sparc64_ttable_tl0), %o0 -2: call smp_callin +2: ldx [%l0], %g6 + ldx [%g6 + TI_TASK], %g4 + + mov 1, %g5 + sllx %g5, THREAD_SHIFT, %g5 + sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5 + add %g6, %g5, %sp + mov 0, %fp + + rdpr %pstate, %o1 + or %o1, PSTATE_IE, %o1 + wrpr %o1, 0, %pstate + + call smp_callin nop call cpu_idle mov 0, %o0 diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 5956d0a94009..c9484ae5bb8f 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -2413,12 +2413,12 @@ struct trap_per_cpu trap_block[NR_CPUS]; /* This can get invoked before sched_init() so play it super safe * and use hard_smp_processor_id(). */ -void init_cur_cpu_trap(void) +void init_cur_cpu_trap(struct thread_info *t) { int cpu = hard_smp_processor_id(); struct trap_per_cpu *p = &trap_block[cpu]; - p->thread = current_thread_info(); + p->thread = t; p->pgd_paddr = 0; } diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 5a970f5ed9bd..771aa94dfd95 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -77,7 +77,7 @@ struct trap_per_cpu { unsigned long __pad2[4]; } __attribute__((aligned(64))); extern struct trap_per_cpu trap_block[NR_CPUS]; -extern void init_cur_cpu_trap(void); +extern void init_cur_cpu_trap(struct thread_info *); extern void setup_tba(void); #ifdef CONFIG_SMP -- cgit v1.2.3-59-g8ed1b From ebd8c56c5ae154e2c6cfb7453a76a4e7265b2377 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 17 Feb 2006 08:38:06 -0800 Subject: [SPARC64]: Fix uniprocessor IRQ targetting on SUN4V. We need to use the real hardware processor ID when targetting interrupts, not the "define to 0" thing the uniprocessor build gives us. Also, fill in the Node-ID and Agent-ID fields properly on sun4u/Safari. Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 4 +- arch/sparc64/kernel/irq.c | 98 +++++++++++++++++++++---------------------- arch/sparc64/kernel/setup.c | 6 +-- include/asm-sparc64/cpudata.h | 6 +-- include/asm-sparc64/irq.h | 3 ++ 5 files changed, 58 insertions(+), 59 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index f5c8a293979f..bd332e41532f 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -1692,10 +1692,12 @@ __flushw_user: #ifdef CONFIG_SMP .globl hard_smp_processor_id hard_smp_processor_id: +#endif + .globl real_hard_smp_processor_id +real_hard_smp_processor_id: __GET_CPUID(%o0) retl nop -#endif /* %o0: devhandle * %o1: devino diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index bb0bb34555da..712b16cdd5fb 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -138,11 +138,48 @@ out_unlock: return 0; } +extern unsigned long real_hard_smp_processor_id(void); + +static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) +{ + unsigned int tid; + + if (this_is_starfire) { + tid = starfire_translate(imap, cpuid); + tid <<= IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } else { + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + unsigned long ver; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_JBUS; + } else { + unsigned int a = cpuid & 0x1f; + unsigned int n = (cpuid >> 5) & 0x1f; + + tid = ((a << IMAP_AID_SHIFT) | + (n << IMAP_NID_SHIFT)); + tid &= (IMAP_AID_SAFARI | + IMAP_NID_SAFARI);; + } + } else { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } + } + + return tid; +} + /* Now these are always passed a true fully specified sun4u INO. */ void enable_irq(unsigned int irq) { struct ino_bucket *bucket = __bucket(irq); - unsigned long imap; + unsigned long imap, cpuid; imap = bucket->imap; if (imap == 0UL) @@ -150,54 +187,25 @@ void enable_irq(unsigned int irq) preempt_disable(); + /* This gets the physical processor ID, even on uniprocessor, + * so we can always program the interrupt target correctly. + */ + cpuid = real_hard_smp_processor_id(); + if (tlb_type == hypervisor) { unsigned int ino = __irq_ino(irq); - int cpu = hard_smp_processor_id(); int err; - err = sun4v_intr_settarget(ino, cpu); + err = sun4v_intr_settarget(ino, cpuid); if (err != HV_EOK) - printk("sun4v_intr_settarget(%x,%d): err(%d)\n", - ino, cpu, err); + printk("sun4v_intr_settarget(%x,%lu): err(%d)\n", + ino, cpuid, err); err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED); if (err != HV_EOK) printk("sun4v_intr_setenabled(%x): err(%d)\n", ino, err); } else { - unsigned long tid; - - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - unsigned long ver; - - __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - if ((ver >> 32) == __JALAPENO_ID || - (ver >> 32) == __SERRANO_ID) { - /* We set it to our JBUS ID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_JBUS_CONFIG)); - tid = ((tid & (0x1fUL<<17)) << 9); - tid &= IMAP_TID_JBUS; - } else { - /* We set it to our Safari AID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i"(ASI_SAFARI_CONFIG)); - tid = ((tid & (0x3ffUL<<17)) << 9); - tid &= IMAP_AID_SAFARI; - } - } else if (this_is_starfire == 0) { - /* We set it to our UPA MID. */ - __asm__ __volatile__("ldxa [%%g0] %1, %0" - : "=r" (tid) - : "i" (ASI_UPA_CONFIG)); - tid = ((tid & UPA_CONFIG_MID) << 9); - tid &= IMAP_TID_UPA; - } else { - tid = (starfire_translate(imap, - smp_processor_id()) << 26); - tid &= IMAP_TID_UPA; - } + unsigned int tid = sun4u_compute_tid(imap, cpuid); /* NOTE NOTE NOTE, IGN and INO are read-only, IGN is a product * of this SYSIO's preconfigured IGN in the SYSIO Control @@ -817,18 +825,8 @@ static int retarget_one_irq(struct irqaction *p, int goal_cpu) sun4v_intr_setenabled(ino, HV_INTR_ENABLED); } else { unsigned long imap = bucket->imap; - unsigned int tid; + unsigned int tid = sun4u_compute_tid(imap, goal_cpu); - if (tlb_type == cheetah || tlb_type == cheetah_plus) { - tid = goal_cpu << 26; - tid &= IMAP_AID_SAFARI; - } else if (this_is_starfire == 0) { - tid = goal_cpu << 26; - tid &= IMAP_TID_UPA; - } else { - tid = (starfire_translate(imap, goal_cpu) << 26); - tid &= IMAP_TID_UPA; - } upa_writel(tid | IMAP_VALID, imap); } diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 9b0c409d5b6a..77066f1bbe2f 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -222,7 +222,6 @@ static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; static void __init per_cpu_patch(void) { -#ifdef CONFIG_SMP struct cpuid_patch_entry *p; unsigned long ver; int is_jbus; @@ -233,8 +232,8 @@ static void __init per_cpu_patch(void) is_jbus = 0; if (tlb_type != hypervisor) { __asm__ ("rdpr %%ver, %0" : "=r" (ver)); - is_jbus = ((ver >> 32) == __JALAPENO_ID || - (ver >> 32) == __SERRANO_ID); + is_jbus = ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID); } p = &__cpuid_patch; @@ -279,7 +278,6 @@ static void __init per_cpu_patch(void) p++; } -#endif } static void __init sun4v_patch(void) diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 771aa94dfd95..84656f1895cd 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -80,7 +80,6 @@ extern struct trap_per_cpu trap_block[NR_CPUS]; extern void init_cur_cpu_trap(struct thread_info *); extern void setup_tba(void); -#ifdef CONFIG_SMP struct cpuid_patch_entry { unsigned int addr; unsigned int cheetah_safari[4]; @@ -89,7 +88,6 @@ struct cpuid_patch_entry { unsigned int sun4v[4]; }; extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end; -#endif struct sun4v_1insn_patch_entry { unsigned int addr; @@ -123,8 +121,6 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, #include -#ifdef CONFIG_SMP - #define __GET_CPUID(REG) \ /* Spitfire implementation (default). */ \ 661: ldxa [%g0] ASI_UPA_CONFIG, REG; \ @@ -156,6 +152,8 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, nop; \ .previous; +#ifdef CONFIG_SMP + #define TRAP_LOAD_TRAP_BLOCK(DEST, TMP) \ __GET_CPUID(TMP) \ sethi %hi(trap_block), DEST; \ diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h index 529a9df1ad43..de33d6e1afb5 100644 --- a/include/asm-sparc64/irq.h +++ b/include/asm-sparc64/irq.h @@ -72,8 +72,11 @@ struct ino_bucket { #define IMAP_VALID 0x80000000 /* IRQ Enabled */ #define IMAP_TID_UPA 0x7c000000 /* UPA TargetID */ #define IMAP_TID_JBUS 0x7c000000 /* JBUS TargetID */ +#define IMAP_TID_SHIFT 26 #define IMAP_AID_SAFARI 0x7c000000 /* Safari AgentID */ +#define IMAP_AID_SHIFT 26 #define IMAP_NID_SAFARI 0x03e00000 /* Safari NodeID */ +#define IMAP_NID_SHIFT 21 #define IMAP_IGN 0x000007c0 /* IRQ Group Number */ #define IMAP_INO 0x0000003f /* IRQ Number */ #define IMAP_INR 0x000007ff /* Full interrupt number*/ -- cgit v1.2.3-59-g8ed1b From 1bd0cd74d102a527b2a72907698d73fad4b82cbd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 21 Feb 2006 15:41:01 -0800 Subject: [SPARC64]: Kill cpudata->idle_volume. Set, but never used. We used to use this for dynamic IRQ retargetting, but that code died a long time ago. Signed-off-by: David S. Miller --- arch/sparc64/kernel/process.c | 10 ++-------- arch/sparc64/kernel/smp.c | 2 -- include/asm-sparc64/cpudata.h | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) (limited to 'include/asm-sparc64/cpudata.h') diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index d00cb7ad89b9..1ab8283efc4c 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -56,6 +56,8 @@ void default_idle(void) { } + + #ifndef CONFIG_SMP /* @@ -104,19 +106,11 @@ void cpu_idle(void) while(1) { if (need_resched()) { - cpuinfo->idle_volume = 0; preempt_enable_no_resched(); schedule(); preempt_disable(); check_pgt_cache(); } - cpuinfo->idle_volume++; - - /* The store ordering is so that IRQ handlers on - * other cpus see our increasing idleness for the buddy - * redistribution algorithm. -DaveM - */ - membar_storeload_storestore(); } } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 356d423ae14d..0cd9b16612e7 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -88,8 +88,6 @@ void __init smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); - cpu_data(id).idle_volume = 1; - def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024)); cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", def); diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index 84656f1895cd..c66a81bbc84d 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -19,7 +19,7 @@ typedef struct { unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ unsigned int multiplier; unsigned int counter; - unsigned int idle_volume; + unsigned int __pad1; unsigned long clock_tick; /* %tick's per second */ unsigned long udelay_val; -- cgit v1.2.3-59-g8ed1b